From 3309bfb51800e73708d9f7c9752310a12dc318db Mon Sep 17 00:00:00 2001 From: Tanmay Rustagi <88379306+tanmay-db@users.noreply.github.com> Date: Wed, 20 Sep 2023 17:38:47 +0200 Subject: [PATCH] Updated SDK to changes in OpenAPI specification (#355) ## Changes Updating SDK to changes in OpenAPI. To be merged after: https://github.com/databricks/databricks-sdk-py/pull/354. ## Tests Integration tests running... - [ ] `make test` run locally - [ ] `make fmt` applied - [ ] relevant integration tests applied --- .codegen/_openapi_sha | 2 +- databricks/sdk/__init__.py | 4 + databricks/sdk/service/billing.py | 8 +- databricks/sdk/service/catalog.py | 66 +-- databricks/sdk/service/compute.py | 40 +- databricks/sdk/service/jobs.py | 207 ++++++--- databricks/sdk/service/ml.py | 67 ++- databricks/sdk/service/oauth2.py | 103 ++++- databricks/sdk/service/provisioning.py | 7 +- databricks/sdk/service/serving.py | 94 +++- databricks/sdk/service/settings.py | 148 ++++++- databricks/sdk/service/sharing.py | 5 +- databricks/sdk/service/sql.py | 478 +++++++++------------ docs/account/account-oauth2.rst | 1 + docs/account/custom_app_integration.rst | 3 - docs/account/ip_access_lists.rst | 15 +- docs/account/metastore_assignments.rst | 2 +- docs/account/o_auth_published_apps.rst | 21 + docs/account/published_app_integration.rst | 5 +- docs/workspace/artifact_allowlists.rst | 9 +- docs/workspace/clusters.rst | 26 +- docs/workspace/credentials_manager.rst | 21 + docs/workspace/ip_access_lists.rst | 15 +- docs/workspace/jobs.rst | 73 ++-- docs/workspace/model_registry.rst | 16 +- docs/workspace/serving_endpoints.rst | 20 +- docs/workspace/statement_execution.rst | 309 ++++++------- docs/workspace/workspace-settings.rst | 1 + 28 files changed, 1103 insertions(+), 663 deletions(-) create mode 100644 docs/account/o_auth_published_apps.rst create mode 100644 docs/workspace/credentials_manager.rst diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index b59218d39..d37a40f1a 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -09a7fa63d9ae243e5407941f200960ca14d48b07 \ No newline at end of file +b52a3b410976501f08f76ca0b355fb2dca876953 \ No newline at end of file diff --git a/databricks/sdk/__init__.py b/databricks/sdk/__init__.py index 1004625e1..89a6c65cd 100755 --- a/databricks/sdk/__init__.py +++ b/databricks/sdk/__init__.py @@ -37,6 +37,7 @@ from databricks.sdk.service.ml import ExperimentsAPI, ModelRegistryAPI from databricks.sdk.service.oauth2 import (CustomAppIntegrationAPI, OAuthEnrollmentAPI, + OAuthPublishedAppsAPI, PublishedAppIntegrationAPI, ServicePrincipalSecretsAPI) from databricks.sdk.service.pipelines import PipelinesAPI @@ -49,6 +50,7 @@ from databricks.sdk.service.settings import (AccountIpAccessListsAPI, AccountNetworkPolicyAPI, AccountSettingsAPI, + CredentialsManagerAPI, IpAccessListsAPI, TokenManagementAPI, TokensAPI, WorkspaceConfAPI) @@ -141,6 +143,7 @@ def __init__(self, self.clusters = ClustersExt(self.api_client) self.command_execution = CommandExecutionAPI(self.api_client) self.connections = ConnectionsAPI(self.api_client) + self.credentials_manager = CredentialsManagerAPI(self.api_client) self.current_user = CurrentUserAPI(self.api_client) self.dashboard_widgets = DashboardWidgetsAPI(self.api_client) self.dashboards = DashboardsAPI(self.api_client) @@ -258,6 +261,7 @@ def __init__(self, self.network_policy = AccountNetworkPolicyAPI(self.api_client) self.networks = NetworksAPI(self.api_client) self.o_auth_enrollment = OAuthEnrollmentAPI(self.api_client) + self.o_auth_published_apps = OAuthPublishedAppsAPI(self.api_client) self.private_access = PrivateAccessAPI(self.api_client) self.published_app_integration = PublishedAppIntegrationAPI(self.api_client) self.service_principal_secrets = ServicePrincipalSecretsAPI(self.api_client) diff --git a/databricks/sdk/service/billing.py b/databricks/sdk/service/billing.py index 8bd2e0497..3941516e7 100755 --- a/databricks/sdk/service/billing.py +++ b/databricks/sdk/service/billing.py @@ -181,7 +181,13 @@ def from_dict(cls, d: Dict[str, any]) -> 'CreateLogDeliveryConfigurationParams': class DeliveryStatus(Enum): - """This describes an enum""" + """The status string for log delivery. Possible values are: * `CREATED`: There were no log delivery + attempts since the config was created. * `SUCCEEDED`: The latest attempt of log delivery has + succeeded completely. * `USER_FAILURE`: The latest attempt of log delivery failed because of + misconfiguration of customer provided permissions on role or storage. * `SYSTEM_FAILURE`: The + latest attempt of log delivery failed because of an Databricks internal error. Contact support + if it doesn't go away soon. * `NOT_FOUND`: The log delivery status as the configuration has been + disabled since the release of this feature or there are no workspaces in the account.""" CREATED = 'CREATED' NOT_FOUND = 'NOT_FOUND' diff --git a/databricks/sdk/service/catalog.py b/databricks/sdk/service/catalog.py index 2a5fbaf91..c62e6cb8e 100755 --- a/databricks/sdk/service/catalog.py +++ b/databricks/sdk/service/catalog.py @@ -164,14 +164,14 @@ def from_dict(cls, d: Dict[str, any]) -> 'AccountsUpdateStorageCredential': @dataclass class ArtifactAllowlistInfo: - artifact_matchers: Optional['ArtifactMatcher'] = None + artifact_matchers: Optional['List[ArtifactMatcher]'] = None created_at: Optional[int] = None created_by: Optional[str] = None metastore_id: Optional[str] = None def as_dict(self) -> dict: body = {} - if self.artifact_matchers: body['artifact_matchers'] = self.artifact_matchers.as_dict() + if self.artifact_matchers: body['artifact_matchers'] = [v.as_dict() for v in self.artifact_matchers] if self.created_at is not None: body['created_at'] = self.created_at if self.created_by is not None: body['created_by'] = self.created_by if self.metastore_id is not None: body['metastore_id'] = self.metastore_id @@ -179,7 +179,7 @@ def as_dict(self) -> dict: @classmethod def from_dict(cls, d: Dict[str, any]) -> 'ArtifactAllowlistInfo': - return cls(artifact_matchers=_from_dict(d, 'artifact_matchers', ArtifactMatcher), + return cls(artifact_matchers=_repeated(d, 'artifact_matchers', ArtifactMatcher), created_at=d.get('created_at', None), created_by=d.get('created_by', None), metastore_id=d.get('metastore_id', None)) @@ -1004,14 +1004,6 @@ def from_dict(cls, d: Dict[str, any]) -> 'DatabricksGcpServiceAccountResponse': return cls(credential_id=d.get('credential_id', None), email=d.get('email', None)) -@dataclass -class DeleteModelVersionRequest: - """Delete a Model Version""" - - full_name: str - version: int - - @dataclass class DeltaRuntimePropertiesKvPairs: """Properties pertaining to the current state of the delta table as given by the commit server. @@ -1451,14 +1443,6 @@ class FunctionParameterType(Enum): PARAM = 'PARAM' -@dataclass -class GetByAliasRequest: - """Get Model Version By Alias""" - - full_name: str - alias: str - - @dataclass class GetMetastoreSummaryResponse: cloud: Optional[str] = None @@ -1540,14 +1524,6 @@ class GetMetastoreSummaryResponseDeltaSharingScope(Enum): INTERNAL_AND_EXTERNAL = 'INTERNAL_AND_EXTERNAL' -@dataclass -class GetModelVersionRequest: - """Get a Model Version""" - - full_name: str - version: int - - class IsolationMode(Enum): """Whether the current securable is accessible from all workspaces or a specific set of workspaces.""" @@ -1626,15 +1602,6 @@ def from_dict(cls, d: Dict[str, any]) -> 'ListMetastoresResponse': return cls(metastores=_repeated(d, 'metastores', MetastoreInfo)) -@dataclass -class ListModelVersionsRequest: - """List Model Versions""" - - full_name: str - max_results: Optional[int] = None - page_token: Optional[str] = None - - @dataclass class ListModelVersionsResponse: model_versions: Optional['List[ModelVersionInfo]'] = None @@ -2023,6 +1990,7 @@ class Privilege(Enum): CREATE_TABLE = 'CREATE_TABLE' CREATE_VIEW = 'CREATE_VIEW' EXECUTE = 'EXECUTE' + MANAGE_ALLOWLIST = 'MANAGE_ALLOWLIST' MODIFY = 'MODIFY' READ_FILES = 'READ_FILES' READ_PRIVATE_FILES = 'READ_PRIVATE_FILES' @@ -2238,22 +2206,23 @@ class SecurableType(Enum): SHARE = 'share' STORAGE_CREDENTIAL = 'storage_credential' TABLE = 'table' + VOLUME = 'volume' @dataclass class SetArtifactAllowlist: - artifact_matchers: 'ArtifactMatcher' + artifact_matchers: 'List[ArtifactMatcher]' artifact_type: Optional['ArtifactType'] = None def as_dict(self) -> dict: body = {} - if self.artifact_matchers: body['artifact_matchers'] = self.artifact_matchers.as_dict() + if self.artifact_matchers: body['artifact_matchers'] = [v.as_dict() for v in self.artifact_matchers] if self.artifact_type is not None: body['artifact_type'] = self.artifact_type.value return body @classmethod def from_dict(cls, d: Dict[str, any]) -> 'SetArtifactAllowlist': - return cls(artifact_matchers=_from_dict(d, 'artifact_matchers', ArtifactMatcher), + return cls(artifact_matchers=_repeated(d, 'artifact_matchers', ArtifactMatcher), artifact_type=_enum(d, 'artifact_type', ArtifactType)) @@ -3201,7 +3170,7 @@ def get(self, workspace_id: int) -> AccountsMetastoreAssignment: headers=headers) return AccountsMetastoreAssignment.from_dict(res) - def list(self, metastore_id: str) -> Iterator['MetastoreAssignment']: + def list(self, metastore_id: str) -> Iterator[int]: """Get all workspaces assigned to a metastore. Gets a list of all Databricks workspace IDs that have been assigned to given metastore. @@ -3209,14 +3178,14 @@ def list(self, metastore_id: str) -> Iterator['MetastoreAssignment']: :param metastore_id: str Unity Catalog metastore ID - :returns: Iterator over :class:`MetastoreAssignment` + :returns: Iterator over int """ headers = {'Accept': 'application/json', } res = self._api.do('GET', f'/api/2.0/accounts/{self._api.account_id}/metastores/{metastore_id}/workspaces', headers=headers) - return [MetastoreAssignment.from_dict(v) for v in res] + return [WorkspaceId.from_dict(v) for v in res] def update(self, workspace_id: int, @@ -3484,7 +3453,8 @@ def __init__(self, api_client): def get(self, artifact_type: ArtifactType) -> ArtifactAllowlistInfo: """Get an artifact allowlist. - Get the artifact allowlist of a certain artifact type. The caller must be a metastore admin. + Get the artifact allowlist of a certain artifact type. The caller must be a metastore admin or have + the **MANAGE ALLOWLIST** privilege on the metastore. :param artifact_type: :class:`ArtifactType` The artifact type of the allowlist. @@ -3498,21 +3468,23 @@ def get(self, artifact_type: ArtifactType) -> ArtifactAllowlistInfo: headers=headers) return ArtifactAllowlistInfo.from_dict(res) - def update(self, artifact_matchers: ArtifactMatcher, + def update(self, artifact_matchers: List[ArtifactMatcher], artifact_type: ArtifactType) -> ArtifactAllowlistInfo: """Set an artifact allowlist. Set the artifact allowlist of a certain artifact type. The whole artifact allowlist is replaced with - the new allowlist. The caller must be a metastore admin. + the new allowlist. The caller must be a metastore admin or have the **MANAGE ALLOWLIST** privilege on + the metastore. - :param artifact_matchers: :class:`ArtifactMatcher` + :param artifact_matchers: List[:class:`ArtifactMatcher`] + A list of allowed artifact match patterns. :param artifact_type: :class:`ArtifactType` The artifact type of the allowlist. :returns: :class:`ArtifactAllowlistInfo` """ body = {} - if artifact_matchers is not None: body['artifact_matchers'] = artifact_matchers.as_dict() + if artifact_matchers is not None: body['artifact_matchers'] = [v.as_dict() for v in artifact_matchers] headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } res = self._api.do('PUT', f'/api/2.1/unity-catalog/artifact-allowlists/{artifact_type.value}', diff --git a/databricks/sdk/service/compute.py b/databricks/sdk/service/compute.py index 506a7909c..882e92e97 100755 --- a/databricks/sdk/service/compute.py +++ b/databricks/sdk/service/compute.py @@ -1296,7 +1296,19 @@ class DataPlaneEventDetailsEventType(Enum): class DataSecurityMode(Enum): - """This describes an enum""" + """Data security mode decides what data governance model to use when accessing data from a cluster. + + * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features + are not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively + used by a single user specified in `single_user_name`. Most programming languages, cluster + features and data governance features are available in this mode. * `USER_ISOLATION`: A secure + cluster that can be shared by multiple users. Cluster users are fully isolated so that they + cannot see each other's data and credentials. Most data governance features are supported in + this mode. But programming languages and cluster features might be limited. * + `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters. * + `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high + concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy + Passthrough on standard clusters.""" LEGACY_PASSTHROUGH = 'LEGACY_PASSTHROUGH' LEGACY_SINGLE_USER = 'LEGACY_SINGLE_USER' @@ -3876,7 +3888,18 @@ def create(self, - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags :param data_security_mode: :class:`DataSecurityMode` (optional) - This describes an enum + Data security mode decides what data governance model to use when accessing data from a cluster. + + * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are + not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a + single user specified in `single_user_name`. Most programming languages, cluster features and data + governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be + shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data + and credentials. Most data governance features are supported in this mode. But programming languages + and cluster features might be limited. * `LEGACY_TABLE_ACL`: This mode is for users migrating from + legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy + Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating + from legacy Passthrough on standard clusters. :param docker_image: :class:`DockerImage` (optional) :param driver_instance_pool_id: str (optional) The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses @@ -4152,7 +4175,18 @@ def edit(self, - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags :param data_security_mode: :class:`DataSecurityMode` (optional) - This describes an enum + Data security mode decides what data governance model to use when accessing data from a cluster. + + * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are + not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a + single user specified in `single_user_name`. Most programming languages, cluster features and data + governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be + shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data + and credentials. Most data governance features are supported in this mode. But programming languages + and cluster features might be limited. * `LEGACY_TABLE_ACL`: This mode is for users migrating from + legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy + Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating + from legacy Passthrough on standard clusters. :param docker_image: :class:`DockerImage` (optional) :param driver_instance_pool_id: str (optional) The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses diff --git a/databricks/sdk/service/jobs.py b/databricks/sdk/service/jobs.py index 9a8243be5..cf91ed9be 100755 --- a/databricks/sdk/service/jobs.py +++ b/databricks/sdk/service/jobs.py @@ -136,16 +136,18 @@ def from_dict(cls, d: Dict[str, any]) -> 'BaseRun': @dataclass class CancelAllRuns: - job_id: int + all_queued_runs: Optional[bool] = None + job_id: Optional[int] = None def as_dict(self) -> dict: body = {} + if self.all_queued_runs is not None: body['all_queued_runs'] = self.all_queued_runs if self.job_id is not None: body['job_id'] = self.job_id return body @classmethod def from_dict(cls, d: Dict[str, any]) -> 'CancelAllRuns': - return cls(job_id=d.get('job_id', None)) + return cls(all_queued_runs=d.get('all_queued_runs', None), job_id=d.get('job_id', None)) @dataclass @@ -263,6 +265,7 @@ class CreateJob: name: Optional[str] = None notification_settings: Optional['JobNotificationSettings'] = None parameters: Optional['List[JobParameterDefinition]'] = None + queue: Optional['QueueSettings'] = None run_as: Optional['JobRunAs'] = None schedule: Optional['CronSchedule'] = None tags: Optional['Dict[str,str]'] = None @@ -286,6 +289,7 @@ def as_dict(self) -> dict: if self.name is not None: body['name'] = self.name if self.notification_settings: body['notification_settings'] = self.notification_settings.as_dict() if self.parameters: body['parameters'] = [v.as_dict() for v in self.parameters] + if self.queue: body['queue'] = self.queue.as_dict() if self.run_as: body['run_as'] = self.run_as.as_dict() if self.schedule: body['schedule'] = self.schedule.as_dict() if self.tags: body['tags'] = self.tags @@ -309,6 +313,7 @@ def from_dict(cls, d: Dict[str, any]) -> 'CreateJob': name=d.get('name', None), notification_settings=_from_dict(d, 'notification_settings', JobNotificationSettings), parameters=_repeated(d, 'parameters', JobParameterDefinition), + queue=_from_dict(d, 'queue', QueueSettings), run_as=_from_dict(d, 'run_as', JobRunAs), schedule=_from_dict(d, 'schedule', CronSchedule), tags=d.get('tags', None), @@ -873,6 +878,7 @@ class JobSettings: name: Optional[str] = None notification_settings: Optional['JobNotificationSettings'] = None parameters: Optional['List[JobParameterDefinition]'] = None + queue: Optional['QueueSettings'] = None run_as: Optional['JobRunAs'] = None schedule: Optional['CronSchedule'] = None tags: Optional['Dict[str,str]'] = None @@ -894,6 +900,7 @@ def as_dict(self) -> dict: if self.name is not None: body['name'] = self.name if self.notification_settings: body['notification_settings'] = self.notification_settings.as_dict() if self.parameters: body['parameters'] = [v.as_dict() for v in self.parameters] + if self.queue: body['queue'] = self.queue.as_dict() if self.run_as: body['run_as'] = self.run_as.as_dict() if self.schedule: body['schedule'] = self.schedule.as_dict() if self.tags: body['tags'] = self.tags @@ -916,6 +923,7 @@ def from_dict(cls, d: Dict[str, any]) -> 'JobSettings': name=d.get('name', None), notification_settings=_from_dict(d, 'notification_settings', JobNotificationSettings), parameters=_repeated(d, 'parameters', JobParameterDefinition), + queue=_from_dict(d, 'queue', QueueSettings), run_as=_from_dict(d, 'run_as', JobRunAs), schedule=_from_dict(d, 'schedule', CronSchedule), tags=d.get('tags', None), @@ -949,7 +957,14 @@ def from_dict(cls, d: Dict[str, any]) -> 'JobSource': class JobSourceDirtyState(Enum): - """This describes an enum""" + """Dirty state indicates the job is not fully synced with the job specification in the remote + repository. + + Possible values are: * `NOT_SYNCED`: The job is not yet synced with the remote job + specification. Import the remote job specification from UI to make the job fully synced. * + `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is + allowed for live edit. Import the remote job specification again from UI to make the job fully + synced.""" DISCONNECTED = 'DISCONNECTED' NOT_SYNCED = 'NOT_SYNCED' @@ -1050,7 +1065,11 @@ def from_dict(cls, d: Dict[str, any]) -> 'ListRunsResponse': class ListRunsRunType(Enum): - """This describes an enum""" + """* `JOB_RUN`: Normal job run. A run created with :method:jobs/runNow. * `WORKFLOW_RUN`: Workflow + run. A run created with [dbutils.notebook.run]. * `SUBMIT_RUN`: Submit run. A run created with + :method:jobs/submit. + + [dbutils.notebook.run]: https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-workflow""" JOB_RUN = 'JOB_RUN' SUBMIT_RUN = 'SUBMIT_RUN' @@ -1155,6 +1174,20 @@ def from_dict(cls, d: Dict[str, any]) -> 'PythonWheelTask': parameters=d.get('parameters', None)) +@dataclass +class QueueSettings: + enabled: bool + + def as_dict(self) -> dict: + body = {} + if self.enabled is not None: body['enabled'] = self.enabled + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> 'QueueSettings': + return cls(enabled=d.get('enabled', None)) + + @dataclass class RepairHistoryItem: end_time: Optional[int] = None @@ -1548,7 +1581,14 @@ class RunConditionTaskOp(Enum): class RunIf(Enum): - """This describes an enum""" + """An optional value indicating the condition that determines whether the task should be run once + its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`. + + Possible values are: * `ALL_SUCCESS`: All dependencies have executed and succeeded * + `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded * `NONE_FAILED`: None of the + dependencies have failed and at least one was executed * `ALL_DONE`: All dependencies have been + completed * `AT_LEAST_ONE_FAILED`: At least one dependency failed * `ALL_FAILED`: ALl + dependencies have failed""" ALL_DONE = 'ALL_DONE' ALL_FAILED = 'ALL_FAILED' @@ -1589,11 +1629,22 @@ def from_dict(cls, d: Dict[str, any]) -> 'RunJobTask': class RunLifeCycleState(Enum): - """This describes an enum""" + """A value indicating the run's lifecycle state. The possible values are: * `QUEUED`: The run is + queued. * `PENDING`: The run is waiting to be executed while the cluster and execution context + are being prepared. * `RUNNING`: The task of this run is being executed. * `TERMINATING`: The + task of this run has completed, and the cluster and execution context are being cleaned up. * + `TERMINATED`: The task of this run has completed, and the cluster and execution context have + been cleaned up. This state is terminal. * `SKIPPED`: This run was aborted because a previous + run of the same job was already active. This state is terminal. * `INTERNAL_ERROR`: An + exceptional state that indicates a failure in the Jobs service, such as network failure over a + long period. If a run on a new cluster ends in the `INTERNAL_ERROR` state, the Jobs service + terminates the cluster as soon as possible. This state is terminal. * `BLOCKED`: The run is + blocked on an upstream dependency. * `WAITING_FOR_RETRY`: The run is waiting for a retry.""" BLOCKED = 'BLOCKED' INTERNAL_ERROR = 'INTERNAL_ERROR' PENDING = 'PENDING' + QUEUED = 'QUEUED' RUNNING = 'RUNNING' SKIPPED = 'SKIPPED' TERMINATED = 'TERMINATED' @@ -1612,6 +1663,7 @@ class RunNow: pipeline_params: Optional['PipelineParams'] = None python_named_params: Optional['Dict[str,str]'] = None python_params: Optional['List[str]'] = None + queue: Optional['QueueSettings'] = None spark_submit_params: Optional['List[str]'] = None sql_params: Optional['Dict[str,str]'] = None @@ -1626,6 +1678,7 @@ def as_dict(self) -> dict: if self.pipeline_params: body['pipeline_params'] = self.pipeline_params.as_dict() if self.python_named_params: body['python_named_params'] = self.python_named_params if self.python_params: body['python_params'] = [v for v in self.python_params] + if self.queue: body['queue'] = self.queue.as_dict() if self.spark_submit_params: body['spark_submit_params'] = [v for v in self.spark_submit_params] if self.sql_params: body['sql_params'] = self.sql_params return body @@ -1641,6 +1694,7 @@ def from_dict(cls, d: Dict[str, any]) -> 'RunNow': pipeline_params=_from_dict(d, 'pipeline_params', PipelineParams), python_named_params=d.get('python_named_params', None), python_params=d.get('python_params', None), + queue=_from_dict(d, 'queue', QueueSettings), spark_submit_params=d.get('spark_submit_params', None), sql_params=d.get('sql_params', None)) @@ -1738,7 +1792,14 @@ def from_dict(cls, d: Dict[str, any]) -> 'RunParameters': class RunResultState(Enum): - """This describes an enum""" + """A value indicating the run's result. The possible values are: * `SUCCESS`: The task completed + successfully. * `FAILED`: The task completed with an error. * `TIMEDOUT`: The run was stopped + after reaching the timeout. * `CANCELED`: The run was canceled at user request. * + `MAXIMUM_CONCURRENT_RUNS_REACHED`: The run was skipped because the maximum concurrent runs were + reached. * `EXCLUDED`: The run was skipped because the necessary conditions were not met. * + `SUCCESS_WITH_FAILURES`: The job run completed successfully with some failures; leaf tasks were + successful. * `UPSTREAM_FAILED`: The run was skipped because of an upstream failure. * + `UPSTREAM_CANCELED`: The run was skipped because an upstream task was canceled.""" CANCELED = 'CANCELED' EXCLUDED = 'EXCLUDED' @@ -1756,6 +1817,7 @@ class RunState: """The current state of the run.""" life_cycle_state: Optional['RunLifeCycleState'] = None + queue_reason: Optional[str] = None result_state: Optional['RunResultState'] = None state_message: Optional[str] = None user_cancelled_or_timedout: Optional[bool] = None @@ -1763,6 +1825,7 @@ class RunState: def as_dict(self) -> dict: body = {} if self.life_cycle_state is not None: body['life_cycle_state'] = self.life_cycle_state.value + if self.queue_reason is not None: body['queue_reason'] = self.queue_reason if self.result_state is not None: body['result_state'] = self.result_state.value if self.state_message is not None: body['state_message'] = self.state_message if self.user_cancelled_or_timedout is not None: @@ -1772,6 +1835,7 @@ def as_dict(self) -> dict: @classmethod def from_dict(cls, d: Dict[str, any]) -> 'RunState': return cls(life_cycle_state=_enum(d, 'life_cycle_state', RunLifeCycleState), + queue_reason=d.get('queue_reason', None), result_state=_enum(d, 'result_state', RunResultState), state_message=d.get('state_message', None), user_cancelled_or_timedout=d.get('user_cancelled_or_timedout', None)) @@ -1795,6 +1859,7 @@ class RunTask: notebook_task: Optional['NotebookTask'] = None pipeline_task: Optional['PipelineTask'] = None python_wheel_task: Optional['PythonWheelTask'] = None + queue_duration: Optional[int] = None resolved_values: Optional['ResolvedValues'] = None run_id: Optional[int] = None run_if: Optional['RunIf'] = None @@ -1826,6 +1891,7 @@ def as_dict(self) -> dict: if self.notebook_task: body['notebook_task'] = self.notebook_task.as_dict() if self.pipeline_task: body['pipeline_task'] = self.pipeline_task.as_dict() if self.python_wheel_task: body['python_wheel_task'] = self.python_wheel_task.as_dict() + if self.queue_duration is not None: body['queue_duration'] = self.queue_duration if self.resolved_values: body['resolved_values'] = self.resolved_values.as_dict() if self.run_id is not None: body['run_id'] = self.run_id if self.run_if is not None: body['run_if'] = self.run_if.value @@ -1858,6 +1924,7 @@ def from_dict(cls, d: Dict[str, any]) -> 'RunTask': notebook_task=_from_dict(d, 'notebook_task', NotebookTask), pipeline_task=_from_dict(d, 'pipeline_task', PipelineTask), python_wheel_task=_from_dict(d, 'python_wheel_task', PythonWheelTask), + queue_duration=d.get('queue_duration', None), resolved_values=_from_dict(d, 'resolved_values', ResolvedValues), run_id=d.get('run_id', None), run_if=_enum(d, 'run_if', RunIf), @@ -1873,7 +1940,11 @@ def from_dict(cls, d: Dict[str, any]) -> 'RunTask': class RunType(Enum): - """This describes an enum""" + """* `JOB_RUN`: Normal job run. A run created with :method:jobs/runNow. * `WORKFLOW_RUN`: Workflow + run. A run created with [dbutils.notebook.run]. * `SUBMIT_RUN`: Submit run. A run created with + :method:jobs/submit. + + [dbutils.notebook.run]: https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-workflow""" JOB_RUN = 'JOB_RUN' SUBMIT_RUN = 'SUBMIT_RUN' @@ -2231,6 +2302,7 @@ class SubmitRun: health: Optional['JobsHealthRules'] = None idempotency_token: Optional[str] = None notification_settings: Optional['JobNotificationSettings'] = None + queue: Optional['QueueSettings'] = None run_name: Optional[str] = None tasks: Optional['List[SubmitTask]'] = None timeout_seconds: Optional[int] = None @@ -2245,6 +2317,7 @@ def as_dict(self) -> dict: if self.health: body['health'] = self.health.as_dict() if self.idempotency_token is not None: body['idempotency_token'] = self.idempotency_token if self.notification_settings: body['notification_settings'] = self.notification_settings.as_dict() + if self.queue: body['queue'] = self.queue.as_dict() if self.run_name is not None: body['run_name'] = self.run_name if self.tasks: body['tasks'] = [v.as_dict() for v in self.tasks] if self.timeout_seconds is not None: body['timeout_seconds'] = self.timeout_seconds @@ -2259,6 +2332,7 @@ def from_dict(cls, d: Dict[str, any]) -> 'SubmitRun': health=_from_dict(d, 'health', JobsHealthRules), idempotency_token=d.get('idempotency_token', None), notification_settings=_from_dict(d, 'notification_settings', JobNotificationSettings), + queue=_from_dict(d, 'queue', QueueSettings), run_name=d.get('run_name', None), tasks=_repeated(d, 'tasks', SubmitTask), timeout_seconds=d.get('timeout_seconds', None), @@ -2568,7 +2642,15 @@ def from_dict(cls, d: Dict[str, any]) -> 'TriggerSettings': class TriggerType(Enum): - """This describes an enum""" + """The type of trigger that fired this run. + + * `PERIODIC`: Schedules that periodically trigger runs, such as a cron scheduler. * `ONE_TIME`: + One time triggers that fire a single run. This occurs you triggered a single run on demand + through the UI or the API. * `RETRY`: Indicates a run that is triggered as a retry of a + previously failed run. This occurs when you request to re-run the job in case of failures. * + `RUN_JOB_TASK`: Indicates a run that is triggered using a Run Job task. + + * `FILE_ARRIVAL`: Indicates a run that is triggered by a file arrival.""" FILE_ARRIVAL = 'FILE_ARRIVAL' ONE_TIME = 'ONE_TIME' @@ -2616,14 +2698,15 @@ def from_dict(cls, d: Dict[str, any]) -> 'ViewItem': class ViewType(Enum): - """This describes an enum""" + """* `NOTEBOOK`: Notebook view item. * `DASHBOARD`: Dashboard view item.""" DASHBOARD = 'DASHBOARD' NOTEBOOK = 'NOTEBOOK' class ViewsToExport(Enum): - """This describes an enum""" + """* `CODE`: Code view of the notebook. * `DASHBOARDS`: All dashboard views of the notebook. * + `ALL`: All views of the notebook.""" ALL = 'ALL' CODE = 'CODE' @@ -2739,18 +2822,22 @@ def wait_get_run_job_terminated_or_skipped(self, attempt += 1 raise TimeoutError(f'timed out after {timeout}: {status_message}') - def cancel_all_runs(self, job_id: int): + def cancel_all_runs(self, *, all_queued_runs: Optional[bool] = None, job_id: Optional[int] = None): """Cancel all runs of a job. Cancels all active runs of a job. The runs are canceled asynchronously, so it doesn't prevent new runs from being started. - :param job_id: int - The canonical identifier of the job to cancel all runs of. This field is required. + :param all_queued_runs: bool (optional) + Optional boolean parameter to cancel all queued runs. If no job_id is provided, all queued runs in + the workspace are canceled. + :param job_id: int (optional) + The canonical identifier of the job to cancel all runs of. """ body = {} + if all_queued_runs is not None: body['all_queued_runs'] = all_queued_runs if job_id is not None: body['job_id'] = job_id headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } self._api.do('POST', '/api/2.1/jobs/runs/cancel-all', body=body, headers=headers) @@ -2791,6 +2878,7 @@ def create(self, name: Optional[str] = None, notification_settings: Optional[JobNotificationSettings] = None, parameters: Optional[List[JobParameterDefinition]] = None, + queue: Optional[QueueSettings] = None, run_as: Optional[JobRunAs] = None, schedule: Optional[CronSchedule] = None, tags: Optional[Dict[str, str]] = None, @@ -2850,6 +2938,8 @@ def create(self, `email_notifications` and `webhook_notifications` for this job. :param parameters: List[:class:`JobParameterDefinition`] (optional) Job-level parameter definitions + :param queue: :class:`QueueSettings` (optional) + The queue settings of the job. :param run_as: :class:`JobRunAs` (optional) Write-only setting, available only in Create/Update/Reset and Submit calls. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the @@ -2891,6 +2981,7 @@ def create(self, if name is not None: body['name'] = name if notification_settings is not None: body['notification_settings'] = notification_settings.as_dict() if parameters is not None: body['parameters'] = [v.as_dict() for v in parameters] + if queue is not None: body['queue'] = queue.as_dict() if run_as is not None: body['run_as'] = run_as.as_dict() if schedule is not None: body['schedule'] = schedule.as_dict() if tags is not None: body['tags'] = tags @@ -3108,8 +3199,8 @@ def list_runs(self, :param active_only: bool (optional) If active_only is `true`, only active runs are included in the results; otherwise, lists both active - and completed runs. An active run is a run in the `PENDING`, `RUNNING`, or `TERMINATING`. This field - cannot be `true` when completed_only is `true`. + and completed runs. An active run is a run in the `QUEUED`, `PENDING`, `RUNNING`, or `TERMINATING`. + This field cannot be `true` when completed_only is `true`. :param completed_only: bool (optional) If completed_only is `true`, only completed runs are included in the results; otherwise, lists both active and completed runs. This field cannot be `true` when active_only is `true`. @@ -3188,11 +3279,11 @@ def repair_run(self, An array of commands to execute for jobs with the dbt task, for example `"dbt_commands": ["dbt deps", "dbt seed", "dbt run"]` :param jar_params: List[str] (optional) - A list of parameters for jobs with Spark JAR tasks, for example `\"jar_params\": [\"john doe\", - \"35\"]`. The parameters are used to invoke the main function of the main class specified in the - Spark JAR task. If not specified upon `run-now`, it defaults to an empty list. jar_params cannot be - specified in conjunction with notebook_params. The JSON representation of this field (for example - `{\"jar_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes. + A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", "35"]`. + The parameters are used to invoke the main function of the main class specified in the Spark JAR + task. If not specified upon `run-now`, it defaults to an empty list. jar_params cannot be specified + in conjunction with notebook_params. The JSON representation of this field (for example + `{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables](/jobs.html"#parameter-variables") to set parameters containing information about job runs. @@ -3200,8 +3291,8 @@ def repair_run(self, The ID of the latest repair. This parameter is not required when repairing a run for the first time, but must be provided on subsequent requests to repair the same run. :param notebook_params: Dict[str,str] (optional) - A map from keys to values for jobs with notebook task, for example `\"notebook_params\": {\"name\": - \"john doe\", \"age\": \"35\"}`. The map is passed to the notebook and is accessible through the + A map from keys to values for jobs with notebook task, for example `"notebook_params": {"name": + "john doe", "age": "35"}`. The map is passed to the notebook and is accessible through the [dbutils.widgets.get] function. If not specified upon `run-now`, the triggered run uses the job’s base parameters. @@ -3210,8 +3301,8 @@ def repair_run(self, Use [Task parameter variables] to set parameters containing information about job runs. - The JSON representation of this field (for example `{\"notebook_params\":{\"name\":\"john - doe\",\"age\":\"35\"}}`) cannot exceed 10,000 bytes. + The JSON representation of this field (for example `{"notebook_params":{"name":"john + doe","age":"35"}}`) cannot exceed 10,000 bytes. [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables [dbutils.widgets.get]: https://docs.databricks.com/dev-tools/databricks-utils.html @@ -3220,10 +3311,10 @@ def repair_run(self, A map from keys to values for jobs with Python wheel task, for example `"python_named_params": {"name": "task", "data": "dbfs:/path/to/data.json"}`. :param python_params: List[str] (optional) - A list of parameters for jobs with Python tasks, for example `\"python_params\": [\"john doe\", - \"35\"]`. The parameters are passed to Python file as command-line parameters. If specified upon - `run-now`, it would overwrite the parameters specified in job setting. The JSON representation of - this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes. + A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. + The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it + would overwrite the parameters specified in job setting. The JSON representation of this field (for + example `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables] to set parameters containing information about job runs. @@ -3242,11 +3333,11 @@ def repair_run(self, :param rerun_tasks: List[str] (optional) The task keys of the task runs to repair. :param spark_submit_params: List[str] (optional) - A list of parameters for jobs with spark submit task, for example `\"spark_submit_params\": - [\"--class\", \"org.apache.spark.examples.SparkPi\"]`. The parameters are passed to spark-submit - script as command-line parameters. If specified upon `run-now`, it would overwrite the parameters - specified in job setting. The JSON representation of this field (for example - `{\"python_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes. + A list of parameters for jobs with spark submit task, for example `"spark_submit_params": + ["--class", "org.apache.spark.examples.SparkPi"]`. The parameters are passed to spark-submit script + as command-line parameters. If specified upon `run-now`, it would overwrite the parameters specified + in job setting. The JSON representation of this field (for example `{"python_params":["john + doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables] to set parameters containing information about job runs @@ -3349,6 +3440,7 @@ def run_now(self, pipeline_params: Optional[PipelineParams] = None, python_named_params: Optional[Dict[str, str]] = None, python_params: Optional[List[str]] = None, + queue: Optional[QueueSettings] = None, spark_submit_params: Optional[List[str]] = None, sql_params: Optional[Dict[str, str]] = None) -> Wait[Run]: """Trigger a new job run. @@ -3374,19 +3466,19 @@ def run_now(self, [How to ensure idempotency for jobs]: https://kb.databricks.com/jobs/jobs-idempotency.html :param jar_params: List[str] (optional) - A list of parameters for jobs with Spark JAR tasks, for example `\"jar_params\": [\"john doe\", - \"35\"]`. The parameters are used to invoke the main function of the main class specified in the - Spark JAR task. If not specified upon `run-now`, it defaults to an empty list. jar_params cannot be - specified in conjunction with notebook_params. The JSON representation of this field (for example - `{\"jar_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes. + A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", "35"]`. + The parameters are used to invoke the main function of the main class specified in the Spark JAR + task. If not specified upon `run-now`, it defaults to an empty list. jar_params cannot be specified + in conjunction with notebook_params. The JSON representation of this field (for example + `{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables](/jobs.html"#parameter-variables") to set parameters containing information about job runs. :param job_parameters: List[Dict[str,str]] (optional) Job-level parameters used in the run :param notebook_params: Dict[str,str] (optional) - A map from keys to values for jobs with notebook task, for example `\"notebook_params\": {\"name\": - \"john doe\", \"age\": \"35\"}`. The map is passed to the notebook and is accessible through the + A map from keys to values for jobs with notebook task, for example `"notebook_params": {"name": + "john doe", "age": "35"}`. The map is passed to the notebook and is accessible through the [dbutils.widgets.get] function. If not specified upon `run-now`, the triggered run uses the job’s base parameters. @@ -3395,8 +3487,8 @@ def run_now(self, Use [Task parameter variables] to set parameters containing information about job runs. - The JSON representation of this field (for example `{\"notebook_params\":{\"name\":\"john - doe\",\"age\":\"35\"}}`) cannot exceed 10,000 bytes. + The JSON representation of this field (for example `{"notebook_params":{"name":"john + doe","age":"35"}}`) cannot exceed 10,000 bytes. [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables [dbutils.widgets.get]: https://docs.databricks.com/dev-tools/databricks-utils.html @@ -3405,10 +3497,10 @@ def run_now(self, A map from keys to values for jobs with Python wheel task, for example `"python_named_params": {"name": "task", "data": "dbfs:/path/to/data.json"}`. :param python_params: List[str] (optional) - A list of parameters for jobs with Python tasks, for example `\"python_params\": [\"john doe\", - \"35\"]`. The parameters are passed to Python file as command-line parameters. If specified upon - `run-now`, it would overwrite the parameters specified in job setting. The JSON representation of - this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes. + A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. + The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it + would overwrite the parameters specified in job setting. The JSON representation of this field (for + example `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables] to set parameters containing information about job runs. @@ -3419,12 +3511,14 @@ def run_now(self, emojis. [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + :param queue: :class:`QueueSettings` (optional) + The queue settings of the run. :param spark_submit_params: List[str] (optional) - A list of parameters for jobs with spark submit task, for example `\"spark_submit_params\": - [\"--class\", \"org.apache.spark.examples.SparkPi\"]`. The parameters are passed to spark-submit - script as command-line parameters. If specified upon `run-now`, it would overwrite the parameters - specified in job setting. The JSON representation of this field (for example - `{\"python_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes. + A list of parameters for jobs with spark submit task, for example `"spark_submit_params": + ["--class", "org.apache.spark.examples.SparkPi"]`. The parameters are passed to spark-submit script + as command-line parameters. If specified upon `run-now`, it would overwrite the parameters specified + in job setting. The JSON representation of this field (for example `{"python_params":["john + doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables] to set parameters containing information about job runs @@ -3453,6 +3547,7 @@ def run_now(self, if pipeline_params is not None: body['pipeline_params'] = pipeline_params.as_dict() if python_named_params is not None: body['python_named_params'] = python_named_params if python_params is not None: body['python_params'] = [v for v in python_params] + if queue is not None: body['queue'] = queue.as_dict() if spark_submit_params is not None: body['spark_submit_params'] = [v for v in spark_submit_params] if sql_params is not None: body['sql_params'] = sql_params headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } @@ -3472,6 +3567,7 @@ def run_now_and_wait(self, pipeline_params: Optional[PipelineParams] = None, python_named_params: Optional[Dict[str, str]] = None, python_params: Optional[List[str]] = None, + queue: Optional[QueueSettings] = None, spark_submit_params: Optional[List[str]] = None, sql_params: Optional[Dict[str, str]] = None, timeout=timedelta(minutes=20)) -> Run: @@ -3484,6 +3580,7 @@ def run_now_and_wait(self, pipeline_params=pipeline_params, python_named_params=python_named_params, python_params=python_params, + queue=queue, spark_submit_params=spark_submit_params, sql_params=sql_params).result(timeout=timeout) @@ -3517,6 +3614,7 @@ def submit(self, health: Optional[JobsHealthRules] = None, idempotency_token: Optional[str] = None, notification_settings: Optional[JobNotificationSettings] = None, + queue: Optional[QueueSettings] = None, run_name: Optional[str] = None, tasks: Optional[List[SubmitTask]] = None, timeout_seconds: Optional[int] = None, @@ -3559,6 +3657,8 @@ def submit(self, :param notification_settings: :class:`JobNotificationSettings` (optional) Optional notification settings that are used when sending notifications to each of the `webhook_notifications` for this run. + :param queue: :class:`QueueSettings` (optional) + The queue settings of the one-time run. :param run_name: str (optional) An optional name for the run. The default value is `Untitled`. :param tasks: List[:class:`SubmitTask`] (optional) @@ -3579,6 +3679,7 @@ def submit(self, if health is not None: body['health'] = health.as_dict() if idempotency_token is not None: body['idempotency_token'] = idempotency_token if notification_settings is not None: body['notification_settings'] = notification_settings.as_dict() + if queue is not None: body['queue'] = queue.as_dict() if run_name is not None: body['run_name'] = run_name if tasks is not None: body['tasks'] = [v.as_dict() for v in tasks] if timeout_seconds is not None: body['timeout_seconds'] = timeout_seconds @@ -3598,6 +3699,7 @@ def submit_and_wait( health: Optional[JobsHealthRules] = None, idempotency_token: Optional[str] = None, notification_settings: Optional[JobNotificationSettings] = None, + queue: Optional[QueueSettings] = None, run_name: Optional[str] = None, tasks: Optional[List[SubmitTask]] = None, timeout_seconds: Optional[int] = None, @@ -3609,6 +3711,7 @@ def submit_and_wait( health=health, idempotency_token=idempotency_token, notification_settings=notification_settings, + queue=queue, run_name=run_name, tasks=tasks, timeout_seconds=timeout_seconds, diff --git a/databricks/sdk/service/ml.py b/databricks/sdk/service/ml.py index 67dcbea87..51fa708ba 100755 --- a/databricks/sdk/service/ml.py +++ b/databricks/sdk/service/ml.py @@ -54,7 +54,12 @@ def from_dict(cls, d: Dict[str, any]) -> 'Activity': class ActivityAction(Enum): - """This describes an enum""" + """An action that a user (with sufficient permissions) could take on an activity. Valid values are: + * `APPROVE_TRANSITION_REQUEST`: Approve a transition request + + * `REJECT_TRANSITION_REQUEST`: Reject a transition request + + * `CANCEL_TRANSITION_REQUEST`: Cancel (delete) a transition request""" APPROVE_TRANSITION_REQUEST = 'APPROVE_TRANSITION_REQUEST' CANCEL_TRANSITION_REQUEST = 'CANCEL_TRANSITION_REQUEST' @@ -62,7 +67,19 @@ class ActivityAction(Enum): class ActivityType(Enum): - """This describes an enum""" + """Type of activity. Valid values are: * `APPLIED_TRANSITION`: User applied the corresponding stage + transition. + + * `REQUESTED_TRANSITION`: User requested the corresponding stage transition. + + * `CANCELLED_REQUEST`: User cancelled an existing transition request. + + * `APPROVED_REQUEST`: User approved the corresponding stage transition. + + * `REJECTED_REQUEST`: User rejected the coressponding stage transition. + + * `SYSTEM_TRANSITION`: For events performed as a side effect, such as archiving existing model + versions in a stage.""" APPLIED_TRANSITION = 'APPLIED_TRANSITION' APPROVED_REQUEST = 'APPROVED_REQUEST' @@ -115,7 +132,10 @@ def from_dict(cls, d: Dict[str, any]) -> 'ApproveTransitionRequestResponse': class CommentActivityAction(Enum): - """This describes an enum""" + """An action that a user (with sufficient permissions) could take on a comment. Valid values are: * + `EDIT_COMMENT`: Edit the comment + + * `DELETE_COMMENT`: Delete the comment""" DELETE_COMMENT = 'DELETE_COMMENT' EDIT_COMMENT = 'EDIT_COMMENT' @@ -1637,7 +1657,13 @@ class RegistryWebhookEvent(Enum): class RegistryWebhookStatus(Enum): - """This describes an enum""" + """Enable or disable triggering the webhook, or put the webhook into test mode. The default is + `ACTIVE`: * `ACTIVE`: Webhook is triggered when an associated event happens. + + * `DISABLED`: Webhook is not triggered. + + * `TEST_MODE`: Webhook can be triggered through the test endpoint, but is not triggered on a + real event.""" ACTIVE = 'ACTIVE' DISABLED = 'DISABLED' @@ -2115,7 +2141,15 @@ def from_dict(cls, d: Dict[str, any]) -> 'SetTag': class Stage(Enum): - """This describes an enum""" + """Stage of the model version. Valid values are: + + * `None`: The initial stage of a model version. + + * `Staging`: Staging or pre-production stage. + + * `Production`: Production stage. + + * `Archived`: Archived stage.""" ARCHIVED = 'Archived' NONE = 'None' @@ -2124,7 +2158,12 @@ class Stage(Enum): class Status(Enum): - """This describes an enum""" + """The status of the model version. Valid values are: * `PENDING_REGISTRATION`: Request to register + a new model version is pending as server performs background tasks. + + * `FAILED_REGISTRATION`: Request to register a new model version has failed. + + * `READY`: Model version is ready for use.""" FAILED_REGISTRATION = 'FAILED_REGISTRATION' PENDING_REGISTRATION = 'PENDING_REGISTRATION' @@ -3504,7 +3543,13 @@ def create_webhook(self, :param model_name: str (optional) Name of the model whose events would trigger this webhook. :param status: :class:`RegistryWebhookStatus` (optional) - This describes an enum + Enable or disable triggering the webhook, or put the webhook into test mode. The default is + `ACTIVE`: * `ACTIVE`: Webhook is triggered when an associated event happens. + + * `DISABLED`: Webhook is not triggered. + + * `TEST_MODE`: Webhook can be triggered through the test endpoint, but is not triggered on a real + event. :returns: :class:`CreateWebhookResponse` """ @@ -4319,7 +4364,13 @@ def update_webhook(self, :param http_url_spec: :class:`HttpUrlSpec` (optional) :param job_spec: :class:`JobSpec` (optional) :param status: :class:`RegistryWebhookStatus` (optional) - This describes an enum + Enable or disable triggering the webhook, or put the webhook into test mode. The default is + `ACTIVE`: * `ACTIVE`: Webhook is triggered when an associated event happens. + + * `DISABLED`: Webhook is not triggered. + + * `TEST_MODE`: Webhook can be triggered through the test endpoint, but is not triggered on a real + event. """ diff --git a/databricks/sdk/service/oauth2.py b/databricks/sdk/service/oauth2.py index e070f6d2d..42f78e94c 100755 --- a/databricks/sdk/service/oauth2.py +++ b/databricks/sdk/service/oauth2.py @@ -212,6 +212,23 @@ def from_dict(cls, d: Dict[str, any]) -> 'GetPublishedAppIntegrationsOutput': return cls(apps=_repeated(d, 'apps', GetPublishedAppIntegrationOutput)) +@dataclass +class GetPublishedAppsOutput: + apps: Optional['List[PublishedAppOutput]'] = None + next_page_token: Optional[str] = None + + def as_dict(self) -> dict: + body = {} + if self.apps: body['apps'] = [v.as_dict() for v in self.apps] + if self.next_page_token is not None: body['next_page_token'] = self.next_page_token + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> 'GetPublishedAppsOutput': + return cls(apps=_repeated(d, 'apps', PublishedAppOutput), + next_page_token=d.get('next_page_token', None)) + + @dataclass class ListServicePrincipalSecretsResponse: secrets: Optional['List[SecretInfo]'] = None @@ -240,6 +257,39 @@ def from_dict(cls, d: Dict[str, any]) -> 'OAuthEnrollmentStatus': return cls(is_enabled=d.get('is_enabled', None)) +@dataclass +class PublishedAppOutput: + app_id: Optional[str] = None + client_id: Optional[str] = None + description: Optional[str] = None + is_confidential_client: Optional[bool] = None + name: Optional[str] = None + redirect_urls: Optional['List[str]'] = None + scopes: Optional['List[str]'] = None + + def as_dict(self) -> dict: + body = {} + if self.app_id is not None: body['app_id'] = self.app_id + if self.client_id is not None: body['client_id'] = self.client_id + if self.description is not None: body['description'] = self.description + if self.is_confidential_client is not None: + body['is_confidential_client'] = self.is_confidential_client + if self.name is not None: body['name'] = self.name + if self.redirect_urls: body['redirect_urls'] = [v for v in self.redirect_urls] + if self.scopes: body['scopes'] = [v for v in self.scopes] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> 'PublishedAppOutput': + return cls(app_id=d.get('app_id', None), + client_id=d.get('client_id', None), + description=d.get('description', None), + is_confidential_client=d.get('is_confidential_client', None), + name=d.get('name', None), + redirect_urls=d.get('redirect_urls', None), + scopes=d.get('scopes', None)) + + @dataclass class SecretInfo: create_time: Optional[str] = None @@ -324,10 +374,7 @@ def from_dict(cls, d: Dict[str, any]) -> 'UpdatePublishedAppIntegration': class CustomAppIntegrationAPI: """These APIs enable administrators to manage custom oauth app integrations, which is required for - adding/using Custom OAuth App Integration like Tableau Cloud for Databricks in AWS cloud. - - **Note:** You can only add/use the OAuth custom application integrations when OAuth enrollment status is - enabled. For more details see :method:OAuthEnrollment/create""" + adding/using Custom OAuth App Integration like Tableau Cloud for Databricks in AWS cloud.""" def __init__(self, api_client): self._api = api_client @@ -505,12 +552,52 @@ def get(self) -> OAuthEnrollmentStatus: return OAuthEnrollmentStatus.from_dict(res) +class OAuthPublishedAppsAPI: + """These APIs enable administrators to view all the available published OAuth applications in Databricks. + Administrators can add the published OAuth applications to their account through the OAuth Published App + Integration APIs.""" + + def __init__(self, api_client): + self._api = api_client + + def list(self, + *, + page_size: Optional[int] = None, + page_token: Optional[str] = None) -> Iterator[PublishedAppOutput]: + """Get all the published OAuth apps. + + Get all the available published OAuth apps in Databricks. + + :param page_size: int (optional) + The max number of OAuth published apps to return. + :param page_token: str (optional) + A token that can be used to get the next page of results. + + :returns: Iterator over :class:`PublishedAppOutput` + """ + + query = {} + if page_size is not None: query['page_size'] = page_size + if page_token is not None: query['page_token'] = page_token + headers = {'Accept': 'application/json', } + + while True: + json = self._api.do('GET', + f'/api/2.0/accounts/{self._api.account_id}/oauth2/published-apps/', + query=query, + headers=headers) + if 'apps' not in json or not json['apps']: + return + for v in json['apps']: + yield PublishedAppOutput.from_dict(v) + if 'next_page_token' not in json or not json['next_page_token']: + return + query['page_token'] = json['next_page_token'] + + class PublishedAppIntegrationAPI: """These APIs enable administrators to manage published oauth app integrations, which is required for - adding/using Published OAuth App Integration like Tableau Cloud for Databricks in AWS cloud. - - **Note:** You can only add/use the OAuth published application integrations when OAuth enrollment status - is enabled. For more details see :method:OAuthEnrollment/create""" + adding/using Published OAuth App Integration like Tableau Desktop for Databricks in AWS cloud.""" def __init__(self, api_client): self._api = api_client diff --git a/databricks/sdk/service/provisioning.py b/databricks/sdk/service/provisioning.py index 9b8e32794..726b23203 100755 --- a/databricks/sdk/service/provisioning.py +++ b/databricks/sdk/service/provisioning.py @@ -551,7 +551,9 @@ class GkeConfigConnectivityType(Enum): class KeyUseCase(Enum): - """This describes an enum""" + """Possible values are: * `MANAGED_SERVICES`: Encrypts notebook and secret data in the control + plane * `STORAGE`: Encrypts the workspace's root S3 bucket (root DBFS and system data) and, + optionally, cluster EBS volumes.""" MANAGED_SERVICES = 'MANAGED_SERVICES' STORAGE = 'STORAGE' @@ -896,7 +898,8 @@ def from_dict(cls, d: Dict[str, any]) -> 'VpcEndpoint': class VpcStatus(Enum): - """This describes an enum""" + """The status of this network configuration object in terms of its use in a workspace: * + `UNATTACHED`: Unattached. * `VALID`: Valid. * `BROKEN`: Broken. * `WARNED`: Warned.""" BROKEN = 'BROKEN' UNATTACHED = 'UNATTACHED' diff --git a/databricks/sdk/service/serving.py b/databricks/sdk/service/serving.py index 7731d834d..5ef02d86a 100755 --- a/databricks/sdk/service/serving.py +++ b/databricks/sdk/service/serving.py @@ -34,16 +34,20 @@ def from_dict(cls, d: Dict[str, any]) -> 'BuildLogsResponse': class CreateServingEndpoint: name: str config: 'EndpointCoreConfigInput' + tags: Optional['List[EndpointTag]'] = None def as_dict(self) -> dict: body = {} if self.config: body['config'] = self.config.as_dict() if self.name is not None: body['name'] = self.name + if self.tags: body['tags'] = [v.as_dict() for v in self.tags] return body @classmethod def from_dict(cls, d: Dict[str, any]) -> 'CreateServingEndpoint': - return cls(config=_from_dict(d, 'config', EndpointCoreConfigInput), name=d.get('name', None)) + return cls(config=_from_dict(d, 'config', EndpointCoreConfigInput), + name=d.get('name', None), + tags=_repeated(d, 'tags', EndpointTag)) @dataclass @@ -160,6 +164,22 @@ class EndpointStateReady(Enum): READY = 'READY' +@dataclass +class EndpointTag: + key: str + value: Optional[str] = None + + def as_dict(self) -> dict: + body = {} + if self.key is not None: body['key'] = self.key + if self.value is not None: body['value'] = self.value + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> 'EndpointTag': + return cls(key=d.get('key', None), value=d.get('value', None)) + + @dataclass class GetServingEndpointPermissionLevelsResponse: permission_levels: Optional['List[ServingEndpointPermissionsDescription]'] = None @@ -188,6 +208,26 @@ def from_dict(cls, d: Dict[str, any]) -> 'ListEndpointsResponse': return cls(endpoints=_repeated(d, 'endpoints', ServingEndpoint)) +@dataclass +class PatchServingEndpointTags: + add_tags: Optional['List[EndpointTag]'] = None + delete_tags: Optional['List[str]'] = None + name: Optional[str] = None + + def as_dict(self) -> dict: + body = {} + if self.add_tags: body['add_tags'] = [v.as_dict() for v in self.add_tags] + if self.delete_tags: body['delete_tags'] = [v for v in self.delete_tags] + if self.name is not None: body['name'] = self.name + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> 'PatchServingEndpointTags': + return cls(add_tags=_repeated(d, 'add_tags', EndpointTag), + delete_tags=d.get('delete_tags', None), + name=d.get('name', None)) + + @dataclass class QueryEndpointResponse: predictions: 'List[Any]' @@ -371,6 +411,7 @@ class ServingEndpoint: last_updated_timestamp: Optional[int] = None name: Optional[str] = None state: Optional['EndpointState'] = None + tags: Optional['List[EndpointTag]'] = None def as_dict(self) -> dict: body = {} @@ -382,6 +423,7 @@ def as_dict(self) -> dict: body['last_updated_timestamp'] = self.last_updated_timestamp if self.name is not None: body['name'] = self.name if self.state: body['state'] = self.state.as_dict() + if self.tags: body['tags'] = [v.as_dict() for v in self.tags] return body @classmethod @@ -392,7 +434,8 @@ def from_dict(cls, d: Dict[str, any]) -> 'ServingEndpoint': id=d.get('id', None), last_updated_timestamp=d.get('last_updated_timestamp', None), name=d.get('name', None), - state=_from_dict(d, 'state', EndpointState)) + state=_from_dict(d, 'state', EndpointState), + tags=_repeated(d, 'tags', EndpointTag)) @dataclass @@ -457,6 +500,7 @@ class ServingEndpointDetailed: pending_config: Optional['EndpointPendingConfig'] = None permission_level: Optional['ServingEndpointDetailedPermissionLevel'] = None state: Optional['EndpointState'] = None + tags: Optional['List[EndpointTag]'] = None def as_dict(self) -> dict: body = {} @@ -470,6 +514,7 @@ def as_dict(self) -> dict: if self.pending_config: body['pending_config'] = self.pending_config.as_dict() if self.permission_level is not None: body['permission_level'] = self.permission_level.value if self.state: body['state'] = self.state.as_dict() + if self.tags: body['tags'] = [v.as_dict() for v in self.tags] return body @classmethod @@ -482,7 +527,8 @@ def from_dict(cls, d: Dict[str, any]) -> 'ServingEndpointDetailed': name=d.get('name', None), pending_config=_from_dict(d, 'pending_config', EndpointPendingConfig), permission_level=_enum(d, 'permission_level', ServingEndpointDetailedPermissionLevel), - state=_from_dict(d, 'state', EndpointState)) + state=_from_dict(d, 'state', EndpointState), + tags=_repeated(d, 'tags', EndpointTag)) class ServingEndpointDetailedPermissionLevel(Enum): @@ -658,7 +704,11 @@ def build_logs(self, name: str, served_model_name: str) -> BuildLogsResponse: headers=headers) return BuildLogsResponse.from_dict(res) - def create(self, name: str, config: EndpointCoreConfigInput) -> Wait[ServingEndpointDetailed]: + def create(self, + name: str, + config: EndpointCoreConfigInput, + *, + tags: Optional[List[EndpointTag]] = None) -> Wait[ServingEndpointDetailed]: """Create a new serving endpoint. :param name: str @@ -666,6 +716,8 @@ def create(self, name: str, config: EndpointCoreConfigInput) -> Wait[ServingEndp workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. :param config: :class:`EndpointCoreConfigInput` The core config of the serving endpoint. + :param tags: List[:class:`EndpointTag`] (optional) + Tags to be attached to the serving endpoint and automatically propagated to billing logs. :returns: Long-running operation waiter for :class:`ServingEndpointDetailed`. @@ -674,6 +726,7 @@ def create(self, name: str, config: EndpointCoreConfigInput) -> Wait[ServingEndp body = {} if config is not None: body['config'] = config.as_dict() if name is not None: body['name'] = name + if tags is not None: body['tags'] = [v.as_dict() for v in tags] headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } op_response = self._api.do('POST', '/api/2.0/serving-endpoints', body=body, headers=headers) return Wait(self.wait_get_serving_endpoint_not_updating, @@ -681,9 +734,13 @@ def create(self, name: str, config: EndpointCoreConfigInput) -> Wait[ServingEndp name=op_response['name']) def create_and_wait( - self, name: str, config: EndpointCoreConfigInput, + self, + name: str, + config: EndpointCoreConfigInput, + *, + tags: Optional[List[EndpointTag]] = None, timeout=timedelta(minutes=20)) -> ServingEndpointDetailed: - return self.create(config=config, name=name).result(timeout=timeout) + return self.create(config=config, name=name, tags=tags).result(timeout=timeout) def delete(self, name: str): """Delete a serving endpoint. @@ -791,6 +848,31 @@ def logs(self, name: str, served_model_name: str) -> ServerLogsResponse: headers=headers) return ServerLogsResponse.from_dict(res) + def patch(self, + name: str, + *, + add_tags: Optional[List[EndpointTag]] = None, + delete_tags: Optional[List[str]] = None) -> Iterator['EndpointTag']: + """Patch the tags of a serving endpoint. + + Used to batch add and delete tags from a serving endpoint with a single API call. + + :param name: str + The name of the serving endpoint who's tags to patch. This field is required. + :param add_tags: List[:class:`EndpointTag`] (optional) + List of endpoint tags to add + :param delete_tags: List[str] (optional) + List of tag keys to delete + + :returns: Iterator over :class:`EndpointTag` + """ + body = {} + if add_tags is not None: body['add_tags'] = [v.as_dict() for v in add_tags] + if delete_tags is not None: body['delete_tags'] = [v for v in delete_tags] + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + res = self._api.do('PATCH', f'/api/2.0/serving-endpoints/{name}/tags', body=body, headers=headers) + return [EndpointTag.from_dict(v) for v in res] + def query(self, name: str) -> QueryEndpointResponse: """Query a serving endpoint with provided model input. diff --git a/databricks/sdk/service/settings.py b/databricks/sdk/service/settings.py index d422618d0..3a650f43b 100755 --- a/databricks/sdk/service/settings.py +++ b/databricks/sdk/service/settings.py @@ -158,6 +158,66 @@ def from_dict(cls, d: Dict[str, any]) -> 'DeletePersonalComputeSettingResponse': return cls(etag=d.get('etag', None)) +@dataclass +class ExchangeToken: + credential: Optional[str] = None + credential_eol_time: Optional[int] = None + owner_id: Optional[int] = None + scopes: Optional['List[str]'] = None + token_type: Optional['TokenType'] = None + + def as_dict(self) -> dict: + body = {} + if self.credential is not None: body['credential'] = self.credential + if self.credential_eol_time is not None: body['credentialEolTime'] = self.credential_eol_time + if self.owner_id is not None: body['ownerId'] = self.owner_id + if self.scopes: body['scopes'] = [v for v in self.scopes] + if self.token_type is not None: body['tokenType'] = self.token_type.value + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> 'ExchangeToken': + return cls(credential=d.get('credential', None), + credential_eol_time=d.get('credentialEolTime', None), + owner_id=d.get('ownerId', None), + scopes=d.get('scopes', None), + token_type=_enum(d, 'tokenType', TokenType)) + + +@dataclass +class ExchangeTokenRequest: + partition_id: 'PartitionId' + token_type: 'List[TokenType]' + scopes: 'List[str]' + + def as_dict(self) -> dict: + body = {} + if self.partition_id: body['partitionId'] = self.partition_id.as_dict() + if self.scopes: body['scopes'] = [v for v in self.scopes] + if self.token_type: body['tokenType'] = [v.value for v in self.token_type] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> 'ExchangeTokenRequest': + return cls(partition_id=_from_dict(d, 'partitionId', PartitionId), + scopes=d.get('scopes', None), + token_type=d.get('tokenType', None)) + + +@dataclass +class ExchangeTokenResponse: + values: Optional['List[ExchangeToken]'] = None + + def as_dict(self) -> dict: + body = {} + if self.values: body['values'] = [v.as_dict() for v in self.values] + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> 'ExchangeTokenResponse': + return cls(values=_repeated(d, 'values', ExchangeToken)) + + @dataclass class FetchIpAccessListResponse: ip_access_list: Optional['IpAccessListInfo'] = None @@ -270,12 +330,29 @@ def from_dict(cls, d: Dict[str, any]) -> 'ListTokensResponse': class ListType(Enum): - """This describes an enum""" + """Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list.""" ALLOW = 'ALLOW' BLOCK = 'BLOCK' +@dataclass +class PartitionId: + workspace_id: Optional[int] = None + + def as_dict(self) -> dict: + body = {} + if self.workspace_id is not None: body['workspaceId'] = self.workspace_id + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> 'PartitionId': + return cls(workspace_id=d.get('workspaceId', None)) + + @dataclass class PersonalComputeMessage: value: 'PersonalComputeMessageEnum' @@ -549,6 +626,12 @@ def from_dict(cls, d: Dict[str, any]) -> 'TokenPermissionsRequest': return cls(access_control_list=_repeated(d, 'access_control_list', TokenAccessControlRequest)) +class TokenType(Enum): + """The type of token request. As of now, only `AZURE_ACTIVE_DIRECTORY_TOKEN` is supported.""" + + AZURE_ACTIVE_DIRECTORY_TOKEN = 'AZURE_ACTIVE_DIRECTORY_TOKEN' + + @dataclass class UpdateIpAccessList: label: str @@ -624,7 +707,10 @@ def create(self, label: str, list_type: ListType, ip_addresses: List[str]) -> Cr :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` - This describes an enum + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. :param ip_addresses: List[str] Array of IP addresses or CIDR values to be added to the IP access list. @@ -712,7 +798,10 @@ def replace(self, :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` - This describes an enum + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. :param ip_addresses: List[str] Array of IP addresses or CIDR values to be added to the IP access list. :param enabled: bool @@ -763,7 +852,10 @@ def update(self, :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` - This describes an enum + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. :param ip_addresses: List[str] Array of IP addresses or CIDR values to be added to the IP access list. :param enabled: bool @@ -965,6 +1057,39 @@ def update_personal_compute_setting( return PersonalComputeSetting.from_dict(res) +class CredentialsManagerAPI: + """Credentials manager interacts with with Identity Providers to to perform token exchanges using stored + credentials and refresh tokens.""" + + def __init__(self, api_client): + self._api = api_client + + def exchange_token(self, partition_id: PartitionId, token_type: List[TokenType], + scopes: List[str]) -> ExchangeTokenResponse: + """Exchange token. + + Exchange tokens with an Identity Provider to get a new access token. It allowes specifying scopes to + determine token permissions. + + :param partition_id: :class:`PartitionId` + :param token_type: List[:class:`TokenType`] + :param scopes: List[str] + Array of scopes for the token request. + + :returns: :class:`ExchangeTokenResponse` + """ + body = {} + if partition_id is not None: body['partitionId'] = partition_id.as_dict() + if scopes is not None: body['scopes'] = [v for v in scopes] + if token_type is not None: body['tokenType'] = [v.value for v in token_type] + headers = {'Accept': 'application/json', 'Content-Type': 'application/json', } + res = self._api.do('POST', + '/api/2.0/credentials-manager/exchange-tokens/token', + body=body, + headers=headers) + return ExchangeTokenResponse.from_dict(res) + + class IpAccessListsAPI: """IP Access List enables admins to configure IP access lists. @@ -1008,7 +1133,10 @@ def create(self, label: str, list_type: ListType, ip_addresses: List[str]) -> Cr :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` - This describes an enum + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. :param ip_addresses: List[str] Array of IP addresses or CIDR values to be added to the IP access list. @@ -1087,7 +1215,10 @@ def replace(self, :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` - This describes an enum + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. :param ip_addresses: List[str] Array of IP addresses or CIDR values to be added to the IP access list. :param enabled: bool @@ -1136,7 +1267,10 @@ def update(self, :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` - This describes an enum + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. :param ip_addresses: List[str] Array of IP addresses or CIDR values to be added to the IP access list. :param enabled: bool diff --git a/databricks/sdk/service/sharing.py b/databricks/sdk/service/sharing.py index d6dd39b74..9a4adec11 100755 --- a/databricks/sdk/service/sharing.py +++ b/databricks/sdk/service/sharing.py @@ -558,6 +558,7 @@ class Privilege(Enum): CREATE_TABLE = 'CREATE_TABLE' CREATE_VIEW = 'CREATE_VIEW' EXECUTE = 'EXECUTE' + MANAGE_ALLOWLIST = 'MANAGE_ALLOWLIST' MODIFY = 'MODIFY' READ_FILES = 'READ_FILES' READ_PRIVATE_FILES = 'READ_PRIVATE_FILES' @@ -669,7 +670,7 @@ class RecipientInfo: comment: Optional[str] = None created_at: Optional[int] = None created_by: Optional[str] = None - data_recipient_global_metastore_id: Optional[Any] = None + data_recipient_global_metastore_id: Optional[str] = None ip_access_list: Optional['IpAccessList'] = None metastore_id: Optional[str] = None name: Optional[str] = None @@ -690,7 +691,7 @@ def as_dict(self) -> dict: if self.comment is not None: body['comment'] = self.comment if self.created_at is not None: body['created_at'] = self.created_at if self.created_by is not None: body['created_by'] = self.created_by - if self.data_recipient_global_metastore_id: + if self.data_recipient_global_metastore_id is not None: body['data_recipient_global_metastore_id'] = self.data_recipient_global_metastore_id if self.ip_access_list: body['ip_access_list'] = self.ip_access_list.as_dict() if self.metastore_id is not None: body['metastore_id'] = self.metastore_id diff --git a/databricks/sdk/service/sql.py b/databricks/sdk/service/sql.py index f5e568237..e44e22b03 100755 --- a/databricks/sdk/service/sql.py +++ b/databricks/sdk/service/sql.py @@ -89,6 +89,7 @@ class AlertOptions: value: Any custom_body: Optional[str] = None custom_subject: Optional[str] = None + empty_result_state: Optional['AlertOptionsEmptyResultState'] = None muted: Optional[bool] = None def as_dict(self) -> dict: @@ -96,6 +97,7 @@ def as_dict(self) -> dict: if self.column is not None: body['column'] = self.column if self.custom_body is not None: body['custom_body'] = self.custom_body if self.custom_subject is not None: body['custom_subject'] = self.custom_subject + if self.empty_result_state is not None: body['empty_result_state'] = self.empty_result_state.value if self.muted is not None: body['muted'] = self.muted if self.op is not None: body['op'] = self.op if self.value: body['value'] = self.value @@ -106,11 +108,20 @@ def from_dict(cls, d: Dict[str, any]) -> 'AlertOptions': return cls(column=d.get('column', None), custom_body=d.get('custom_body', None), custom_subject=d.get('custom_subject', None), + empty_result_state=_enum(d, 'empty_result_state', AlertOptionsEmptyResultState), muted=d.get('muted', None), op=d.get('op', None), value=d.get('value', None)) +class AlertOptionsEmptyResultState(Enum): + """State that alert evaluates to when query result is empty.""" + + OK = 'ok' + TRIGGERED = 'triggered' + UNKNOWN = 'unknown' + + @dataclass class AlertQuery: created_at: Optional[str] = None @@ -170,6 +181,32 @@ class AlertState(Enum): UNKNOWN = 'unknown' +@dataclass +class BaseChunkInfo: + """Describes metadata for a particular chunk, within a result set; this structure is used both + within a manifest, and when fetching individual chunk data or links.""" + + byte_count: Optional[int] = None + chunk_index: Optional[int] = None + row_count: Optional[int] = None + row_offset: Optional[int] = None + + def as_dict(self) -> dict: + body = {} + if self.byte_count is not None: body['byte_count'] = self.byte_count + if self.chunk_index is not None: body['chunk_index'] = self.chunk_index + if self.row_count is not None: body['row_count'] = self.row_count + if self.row_offset is not None: body['row_offset'] = self.row_offset + return body + + @classmethod + def from_dict(cls, d: Dict[str, any]) -> 'BaseChunkInfo': + return cls(byte_count=d.get('byte_count', None), + chunk_index=d.get('chunk_index', None), + row_count=d.get('row_count', None), + row_offset=d.get('row_offset', None)) + + @dataclass class Channel: dbsql_version: Optional[str] = None @@ -213,39 +250,6 @@ class ChannelName(Enum): CHANNEL_NAME_UNSPECIFIED = 'CHANNEL_NAME_UNSPECIFIED' -@dataclass -class ChunkInfo: - """Describes metadata for a particular chunk, within a result set; this structure is used both - within a manifest, and when fetching individual chunk data or links.""" - - byte_count: Optional[int] = None - chunk_index: Optional[int] = None - next_chunk_index: Optional[int] = None - next_chunk_internal_link: Optional[str] = None - row_count: Optional[int] = None - row_offset: Optional[int] = None - - def as_dict(self) -> dict: - body = {} - if self.byte_count is not None: body['byte_count'] = self.byte_count - if self.chunk_index is not None: body['chunk_index'] = self.chunk_index - if self.next_chunk_index is not None: body['next_chunk_index'] = self.next_chunk_index - if self.next_chunk_internal_link is not None: - body['next_chunk_internal_link'] = self.next_chunk_internal_link - if self.row_count is not None: body['row_count'] = self.row_count - if self.row_offset is not None: body['row_offset'] = self.row_offset - return body - - @classmethod - def from_dict(cls, d: Dict[str, any]) -> 'ChunkInfo': - return cls(byte_count=d.get('byte_count', None), - chunk_index=d.get('chunk_index', None), - next_chunk_index=d.get('next_chunk_index', None), - next_chunk_internal_link=d.get('next_chunk_internal_link', None), - row_count=d.get('row_count', None), - row_offset=d.get('row_offset', None)) - - @dataclass class ColumnInfo: name: Optional[str] = None @@ -279,7 +283,8 @@ def from_dict(cls, d: Dict[str, any]) -> 'ColumnInfo': class ColumnInfoTypeName(Enum): - """Name of type (INT, STRUCT, MAP, and so on)""" + """The name of the base data type. This doesn't include details for complex types such as STRUCT, + MAP or ARRAY.""" ARRAY = 'ARRAY' BINARY = 'BINARY' @@ -556,10 +561,10 @@ class Disposition(Enum): Statements executed with `INLINE` disposition will return result data inline, in `JSON_ARRAY` format, in a series of chunks. If a given statement produces a result set with a size larger - than 16 MiB, that statement execution is aborted, and no result set will be available. + than 25 MiB, that statement execution is aborted, and no result set will be available. **NOTE** Byte limits are computed based upon internal representations of the result set data, - and may not match the sizes visible in JSON responses. + and might not match the sizes visible in JSON responses. Statements executed with `EXTERNAL_LINKS` disposition will return result data as external links: URLs that point to cloud storage internal to the workspace. Using `EXTERNAL_LINKS` disposition @@ -823,17 +828,17 @@ def from_dict(cls, d: Dict[str, any]) -> 'EndpointTags': @dataclass class ExecuteStatementRequest: + statement: str + warehouse_id: str byte_limit: Optional[int] = None catalog: Optional[str] = None disposition: Optional['Disposition'] = None format: Optional['Format'] = None - on_wait_timeout: Optional['TimeoutAction'] = None + on_wait_timeout: Optional['ExecuteStatementRequestOnWaitTimeout'] = None parameters: Optional['List[StatementParameterListItem]'] = None row_limit: Optional[int] = None schema: Optional[str] = None - statement: Optional[str] = None wait_timeout: Optional[str] = None - warehouse_id: Optional[str] = None def as_dict(self) -> dict: body = {} @@ -856,7 +861,7 @@ def from_dict(cls, d: Dict[str, any]) -> 'ExecuteStatementRequest': catalog=d.get('catalog', None), disposition=_enum(d, 'disposition', Disposition), format=_enum(d, 'format', Format), - on_wait_timeout=_enum(d, 'on_wait_timeout', TimeoutAction), + on_wait_timeout=_enum(d, 'on_wait_timeout', ExecuteStatementRequestOnWaitTimeout), parameters=_repeated(d, 'parameters', StatementParameterListItem), row_limit=d.get('row_limit', None), schema=d.get('schema', None), @@ -865,6 +870,18 @@ def from_dict(cls, d: Dict[str, any]) -> 'ExecuteStatementRequest': warehouse_id=d.get('warehouse_id', None)) +class ExecuteStatementRequestOnWaitTimeout(Enum): + """When `wait_timeout > 0s`, the call will block up to the specified time. If the statement + execution doesn't finish within this time, `on_wait_timeout` determines whether the execution + should continue or be canceled. When set to `CONTINUE`, the statement execution continues + asynchronously and the call returns a statement ID which can be used for polling with + :method:statementexecution/getStatement. When set to `CANCEL`, the statement execution is + canceled and the call returns with a `CANCELED` state.""" + + CANCEL = 'CANCEL' + CONTINUE = 'CONTINUE' + + @dataclass class ExecuteStatementResponse: manifest: Optional['ResultManifest'] = None @@ -925,40 +942,6 @@ def from_dict(cls, d: Dict[str, any]) -> 'ExternalLink': class Format(Enum): - """Statement execution supports three result formats: `JSON_ARRAY` (default), `ARROW_STREAM`, and - `CSV`. - - When specifying `format=JSON_ARRAY`, result data will be formatted as an array of arrays of - values, where each value is either the *string representation* of a value, or `null`. For - example, the output of `SELECT concat('id-', id) AS strCol, id AS intCol, null AS nullCol FROM - range(3)` would look like this: - - ``` [ [ "id-1", "1", null ], [ "id-2", "2", null ], [ "id-3", "3", null ], ] ``` - - `JSON_ARRAY` is supported with `INLINE` and `EXTERNAL_LINKS` dispositions. - - `INLINE` `JSON_ARRAY` data can be found at the path `StatementResponse.result.data_array`. - - For `EXTERNAL_LINKS` `JSON_ARRAY` results, each URL points to a file in cloud storage that - contains compact JSON with no indentation or extra whitespace. - - When specifying `format=ARROW_STREAM`, each chunk in the result will be formatted as Apache - Arrow Stream. See the [Apache Arrow streaming format]. - - IMPORTANT: The format `ARROW_STREAM` is supported only with `EXTERNAL_LINKS` disposition. - - When specifying `format=CSV`, each chunk in the result will be a CSV according to [RFC 4180] - standard. All the columns values will have *string representation* similar to the `JSON_ARRAY` - format, and `null` values will be encoded as “null”. Only the first chunk in the result - would contain a header row with column names. For example, the output of `SELECT concat('id-', - id) AS strCol, id AS intCol, null as nullCol FROM range(3)` would look like this: - - ``` strCol,intCol,nullCol id-1,1,null id-2,2,null id-3,3,null ``` - - IMPORTANT: The format `CSV` is supported only with `EXTERNAL_LINKS` disposition. - - [Apache Arrow streaming format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format - [RFC 4180]: https://www.rfc-editor.org/rfc/rfc4180""" ARROW_STREAM = 'ARROW_STREAM' CSV = 'CSV' @@ -1301,7 +1284,8 @@ class ParameterType(Enum): class PermissionLevel(Enum): - """This describes an enum""" + """* `CAN_VIEW`: Can view the query * `CAN_RUN`: Can run the query * `CAN_MANAGE`: Can manage the + query""" CAN_MANAGE = 'CAN_MANAGE' CAN_RUN = 'CAN_RUN' @@ -1732,7 +1716,9 @@ class QueryStatementType(Enum): class QueryStatus(Enum): - """This describes an enum""" + """Query status with one the following values: * `QUEUED`: Query has been received and queued. * + `RUNNING`: Query has started. * `CANCELED`: Query has been cancelled by the user. * `FAILED`: + Query has failed. * `FINISHED`: Query has completed.""" CANCELED = 'CANCELED' FAILED = 'FAILED' @@ -1761,9 +1747,11 @@ def from_dict(cls, d: Dict[str, any]) -> 'RepeatedEndpointConfPairs': @dataclass class ResultData: - """Result data chunks are delivered in either the `chunk` field when using `INLINE` disposition, or - in the `external_link` field when using `EXTERNAL_LINKS` disposition. Exactly one of these will - be set.""" + """Contains the result data of a single chunk when using `INLINE` disposition. When using + `EXTERNAL_LINKS` disposition, the array `external_links` is used instead to provide presigned + URLs to the result data in cloud storage. Exactly one of these alternatives is used. (While the + `external_links` array prepares the API to return multiple links in a single response. Currently + only a single link is returned.)""" byte_count: Optional[int] = None chunk_index: Optional[int] = None @@ -1803,12 +1791,13 @@ def from_dict(cls, d: Dict[str, any]) -> 'ResultData': class ResultManifest: """The result manifest provides schema and metadata for the result set.""" - chunks: Optional['List[ChunkInfo]'] = None + chunks: Optional['List[BaseChunkInfo]'] = None format: Optional['Format'] = None schema: Optional['ResultSchema'] = None total_byte_count: Optional[int] = None total_chunk_count: Optional[int] = None total_row_count: Optional[int] = None + truncated: Optional[bool] = None def as_dict(self) -> dict: body = {} @@ -1818,21 +1807,23 @@ def as_dict(self) -> dict: if self.total_byte_count is not None: body['total_byte_count'] = self.total_byte_count if self.total_chunk_count is not None: body['total_chunk_count'] = self.total_chunk_count if self.total_row_count is not None: body['total_row_count'] = self.total_row_count + if self.truncated is not None: body['truncated'] = self.truncated return body @classmethod def from_dict(cls, d: Dict[str, any]) -> 'ResultManifest': - return cls(chunks=_repeated(d, 'chunks', ChunkInfo), + return cls(chunks=_repeated(d, 'chunks', BaseChunkInfo), format=_enum(d, 'format', Format), schema=_from_dict(d, 'schema', ResultSchema), total_byte_count=d.get('total_byte_count', None), total_chunk_count=d.get('total_chunk_count', None), - total_row_count=d.get('total_row_count', None)) + total_row_count=d.get('total_row_count', None), + truncated=d.get('truncated', None)) @dataclass class ResultSchema: - """Schema is an ordered list of column descriptions.""" + """The schema is an ordered list of column descriptions.""" column_count: Optional[int] = None columns: Optional['List[ColumnInfo]'] = None @@ -2016,7 +2007,7 @@ class StatementState(Enum): @dataclass class StatementStatus: - """Status response includes execution state and if relevant, error information.""" + """The status response includes execution state and if relevant, error information.""" error: Optional['ServiceError'] = None state: Optional['StatementState'] = None @@ -2189,20 +2180,6 @@ def from_dict(cls, d: Dict[str, any]) -> 'TimeRange': return cls(end_time_ms=d.get('end_time_ms', None), start_time_ms=d.get('start_time_ms', None)) -class TimeoutAction(Enum): - """When in synchronous mode with `wait_timeout > 0s` it determines the action taken when the - timeout is reached: - - `CONTINUE` → the statement execution continues asynchronously and the call returns a statement - ID immediately. - - `CANCEL` → the statement execution is canceled and the call returns immediately with a - `CANCELED` state.""" - - CANCEL = 'CANCEL' - CONTINUE = 'CONTINUE' - - @dataclass class TransferOwnershipObjectId: new_owner: Optional[str] = None @@ -3308,161 +3285,88 @@ def update(self, class StatementExecutionAPI: - """The SQL Statement Execution API manages the execution of arbitrary SQL statements and the fetching of - result data. - - **Release status** - - This feature is in [Public Preview]. + """The Databricks SQL Statement Execution API can be used to execute SQL statements on a SQL warehouse and + fetch the result. **Getting started** - We suggest beginning with the [SQL Statement Execution API tutorial]. + We suggest beginning with the [Databricks SQL Statement Execution API tutorial]. **Overview of statement execution and result fetching** Statement execution begins by issuing a :method:statementexecution/executeStatement request with a valid SQL statement and warehouse ID, along with optional parameters such as the data catalog and output format. + If no other parameters are specified, the server will wait for up to 10s before returning a response. If + the statement has completed within this timespan, the response will include the result data as a JSON + array and metadata. Otherwise, if no result is available after the 10s timeout expired, the response will + provide the statement ID that can be used to poll for results by using a + :method:statementexecution/getStatement request. - When submitting the statement, the call can behave synchronously or asynchronously, based on the - `wait_timeout` setting. When set between 5-50 seconds (default: 10) the call behaves synchronously and - waits for results up to the specified timeout; when set to `0s`, the call is asynchronous and responds - immediately with a statement ID that can be used to poll for status or fetch the results in a separate - call. + You can specify whether the call should behave synchronously, asynchronously or start synchronously with a + fallback to asynchronous execution. This is controlled with the `wait_timeout` and `on_wait_timeout` + settings. If `wait_timeout` is set between 5-50 seconds (default: 10s), the call waits for results up to + the specified timeout; when set to `0s`, the call is asynchronous and responds immediately with a + statement ID. The `on_wait_timeout` setting specifies what should happen when the timeout is reached while + the statement execution has not yet finished. This can be set to either `CONTINUE`, to fallback to + asynchronous mode, or it can be set to `CANCEL`, which cancels the statement. - **Call mode: synchronous** + In summary: - Synchronous mode - `wait_timeout=30s` and `on_wait_timeout=CANCEL` - The call waits up to 30 + seconds; if the statement execution finishes within this time, the result data is returned directly in the + response. If the execution takes longer than 30 seconds, the execution is canceled and the call returns + with a `CANCELED` state. - Asynchronous mode - `wait_timeout=0s` (`on_wait_timeout` is ignored) - The call + doesn't wait for the statement to finish but returns directly with a statement ID. The status of the + statement execution can be polled by issuing :method:statementexecution/getStatement with the statement + ID. Once the execution has succeeded, this call also returns the result and metadata in the response. - + Hybrid mode (default) - `wait_timeout=10s` and `on_wait_timeout=CONTINUE` - The call waits for up to 10 + seconds; if the statement execution finishes within this time, the result data is returned directly in the + response. If the execution takes longer than 10 seconds, a statement ID is returned. The statement ID can + be used to fetch status and results in the same way as in the asynchronous mode. - In synchronous mode, when statement execution completes within the `wait timeout`, the result data is - returned directly in the response. This response will contain `statement_id`, `status`, `manifest`, and - `result` fields. The `status` field confirms success whereas the `manifest` field contains the result data - column schema and metadata about the result set. The `result` field contains the first chunk of result - data according to the specified `disposition`, and links to fetch any remaining chunks. + Depending on the size, the result can be split into multiple chunks. If the statement execution is + successful, the statement response contains a manifest and the first chunk of the result. The manifest + contains schema information and provides metadata for each chunk in the result. Result chunks can be + retrieved by index with :method:statementexecution/getStatementResultChunkN which may be called in any + order and in parallel. For sequential fetching, each chunk, apart from the last, also contains a + `next_chunk_index` and `next_chunk_internal_link` that point to the next chunk. - If the execution does not complete before `wait_timeout`, the setting `on_wait_timeout` determines how the - system responds. - - By default, `on_wait_timeout=CONTINUE`, and after reaching `wait_timeout`, a response is returned and - statement execution continues asynchronously. The response will contain only `statement_id` and `status` - fields, and the caller must now follow the flow described for asynchronous call mode to poll and fetch the - result. - - Alternatively, `on_wait_timeout` can also be set to `CANCEL`; in this case if the timeout is reached - before execution completes, the underlying statement execution is canceled, and a `CANCELED` status is - returned in the response. - - **Call mode: asynchronous** - - In asynchronous mode, or after a timed-out synchronous request continues, a `statement_id` and `status` - will be returned. In this case polling :method:statementexecution/getStatement calls are required to fetch - the result and metadata. - - Next, a caller must poll until execution completes (`SUCCEEDED`, `FAILED`, etc.) by issuing - :method:statementexecution/getStatement requests for the given `statement_id`. - - When execution has succeeded, the response will contain `status`, `manifest`, and `result` fields. These - fields and the structure are identical to those in the response to a successful synchronous submission. - The `result` field will contain the first chunk of result data, either `INLINE` or as `EXTERNAL_LINKS` - depending on `disposition`. Additional chunks of result data can be fetched by checking for the presence - of the `next_chunk_internal_link` field, and iteratively `GET` those paths until that field is unset: `GET - https://$DATABRICKS_HOST/{next_chunk_internal_link}`. + A statement can be canceled with :method:statementexecution/cancelExecution. **Fetching result data: format and disposition** - To specify the result data format, set the `format` field to `JSON_ARRAY` (JSON), `ARROW_STREAM` ([Apache - Arrow Columnar]), or `CSV`. - - You can also configure how to fetch the result data in two different modes by setting the `disposition` - field to `INLINE` or `EXTERNAL_LINKS`. + To specify the format of the result data, use the `format` field, which can be set to one of the following + options: `JSON_ARRAY` (JSON), `ARROW_STREAM` ([Apache Arrow Columnar]), or `CSV`. - The `INLINE` disposition can only be used with the `JSON_ARRAY` format and allows results up to 16 MiB. - When a statement executed with `INLINE` disposition exceeds this limit, the execution is aborted, and no - result can be fetched. + There are two ways to receive statement results, controlled by the `disposition` setting, which can be + either `INLINE` or `EXTERNAL_LINKS`: - The `EXTERNAL_LINKS` disposition allows fetching large result sets in `JSON_ARRAY`, `ARROW_STREAM` and - `CSV` formats, and with higher throughput. + - `INLINE`: In this mode, the result data is directly included in the response. It's best suited for + smaller results. This mode can only be used with the `JSON_ARRAY` format. - The API uses defaults of `format=JSON_ARRAY` and `disposition=INLINE`. Databricks recommends that you - explicit setting the format and the disposition for all production use cases. + - `EXTERNAL_LINKS`: In this mode, the response provides links that can be used to download the result data + in chunks separately. This approach is ideal for larger results and offers higher throughput. This mode + can be used with all the formats: `JSON_ARRAY`, `ARROW_STREAM`, and `CSV`. - **Statement response: statement_id, status, manifest, and result** - - The base call :method:statementexecution/getStatement returns a single response combining `statement_id`, - `status`, a result `manifest`, and a `result` data chunk or link, depending on the `disposition`. The - `manifest` contains the result schema definition and the result summary metadata. When using - `disposition=EXTERNAL_LINKS`, it also contains a full listing of all chunks and their summary metadata. - - **Use case: small result sets with INLINE + JSON_ARRAY** - - For flows that generate small and predictable result sets (<= 16 MiB), `INLINE` downloads of `JSON_ARRAY` - result data are typically the simplest way to execute and fetch result data. - - When the result set with `disposition=INLINE` is larger, the result can be transferred in chunks. After - receiving the initial chunk with :method:statementexecution/executeStatement or - :method:statementexecution/getStatement subsequent calls are required to iteratively fetch each chunk. - Each result response contains a link to the next chunk, when there are additional chunks to fetch; it can - be found in the field `.next_chunk_internal_link`. This link is an absolute `path` to be joined with your - `$DATABRICKS_HOST`, and of the form `/api/2.0/sql/statements/{statement_id}/result/chunks/{chunk_index}`. - The next chunk can be fetched by issuing a :method:statementexecution/getStatementResultChunkN request. - - When using this mode, each chunk may be fetched once, and in order. A chunk without a field - `next_chunk_internal_link` indicates the last chunk was reached and all chunks have been fetched from the - result set. - - **Use case: large result sets with EXTERNAL_LINKS + ARROW_STREAM** - - Using `EXTERNAL_LINKS` to fetch result data in Arrow format allows you to fetch large result sets - efficiently. The primary difference from using `INLINE` disposition is that fetched result chunks contain - resolved `external_links` URLs, which can be fetched with standard HTTP. - - **Presigned URLs** - - External links point to data stored within your workspace's internal DBFS, in the form of a presigned URL. - The URLs are valid for only a short period, <= 15 minutes. Alongside each `external_link` is an expiration - field indicating the time at which the URL is no longer valid. In `EXTERNAL_LINKS` mode, chunks can be - resolved and fetched multiple times and in parallel. - - ---- - - ### **Warning: We recommend you protect the URLs in the EXTERNAL_LINKS.** - - When using the EXTERNAL_LINKS disposition, a short-lived pre-signed URL is generated, which the client can - use to download the result chunk directly from cloud storage. As the short-lived credential is embedded in - a pre-signed URL, this URL should be protected. - - Since pre-signed URLs are generated with embedded temporary credentials, you need to remove the - authorization header from the fetch requests. - - ---- - - Similar to `INLINE` mode, callers can iterate through the result set, by using the - `next_chunk_internal_link` field. Each internal link response will contain an external link to the raw - chunk data, and additionally contain the `next_chunk_internal_link` if there are more chunks. - - Unlike `INLINE` mode, when using `EXTERNAL_LINKS`, chunks may be fetched out of order, and in parallel to - achieve higher throughput. + By default, the API uses `format=JSON_ARRAY` and `disposition=INLINE`. **Limits and limitations** - Note: All byte limits are calculated based on internal storage metrics and will not match byte counts of - actual payloads. + Note: The byte limit for INLINE disposition is based on internal storage metrics and will not exactly + match the byte count of the actual payload. - - Statements with `disposition=INLINE` are limited to 16 MiB and will abort when this limit is exceeded. - - Statements with `disposition=EXTERNAL_LINKS` are limited to 100 GiB. - The maximum query text size is 16 - MiB. - Cancelation may silently fail. A successful response from a cancel request indicates that the - cancel request was successfully received and sent to the processing engine. However, for example, an - outstanding statement may complete execution during signal delivery, with the cancel signal arriving too - late to be meaningful. Polling for status until a terminal state is reached is a reliable way to determine - the final state. - Wait timeouts are approximate, occur server-side, and cannot account for caller delays, - network latency from caller to service, and similarly. - After a statement has been submitted and a - statement_id is returned, that statement's status and result will automatically close after either of 2 - conditions: - The last result chunk is fetched (or resolved to an external link). - One hour passes with - no calls to get the status or fetch the result. Best practice: in asynchronous clients, poll for status - regularly (and with backoff) to keep the statement open and alive. - After fetching the last result chunk - (including chunk_index=0) the statement is automatically closed. + - Statements with `disposition=INLINE` are limited to 25 MiB and will fail when this limit is exceeded. - + Statements with `disposition=EXTERNAL_LINKS` are limited to 100 GiB. Result sets larger than this limit + will be truncated. Truncation is indicated by the `truncated` field in the result manifest. - The maximum + query text size is 16 MiB. - Cancelation might silently fail. A successful response from a cancel request + indicates that the cancel request was successfully received and sent to the processing engine. However, an + outstanding statement might have already completed execution when the cancel request arrives. Polling for + status until a terminal state is reached is a reliable way to determine the final state. - Wait timeouts + are approximate, occur server-side, and cannot account for things such as caller delays and network + latency from caller to service. - The system will auto-close a statement after one hour if the client + stops polling and thus you must poll at least once an hour. - The results are only available for one hour + after success; polling does not extend this. [Apache Arrow Columnar]: https://arrow.apache.org/overview/ - [Public Preview]: https://docs.databricks.com/release-notes/release-types.html - [SQL Statement Execution API tutorial]: https://docs.databricks.com/sql/api/sql-execution-tutorial.html""" + [Databricks SQL Statement Execution API tutorial]: https://docs.databricks.com/sql/api/sql-execution-tutorial.html""" def __init__(self, api_client): self._api = api_client @@ -3482,25 +3386,31 @@ def cancel_execution(self, statement_id: str): self._api.do('POST', f'/api/2.0/sql/statements/{statement_id}/cancel', headers=headers) def execute_statement(self, + statement: str, + warehouse_id: str, *, byte_limit: Optional[int] = None, catalog: Optional[str] = None, disposition: Optional[Disposition] = None, format: Optional[Format] = None, - on_wait_timeout: Optional[TimeoutAction] = None, + on_wait_timeout: Optional[ExecuteStatementRequestOnWaitTimeout] = None, parameters: Optional[List[StatementParameterListItem]] = None, row_limit: Optional[int] = None, schema: Optional[str] = None, - statement: Optional[str] = None, - wait_timeout: Optional[str] = None, - warehouse_id: Optional[str] = None) -> ExecuteStatementResponse: + wait_timeout: Optional[str] = None) -> ExecuteStatementResponse: """Execute a SQL statement. - Execute a SQL statement, and if flagged as such, await its result for a specified time. - + :param statement: str + The SQL statement to execute. The statement can optionally be parameterized, see `parameters`. + :param warehouse_id: str + Warehouse upon which to execute a statement. See also [What are SQL + warehouses?](/sql/admin/warehouse-type.html) :param byte_limit: int (optional) - Applies the given byte limit to the statement's result size. Byte counts are based on internal - representations and may not match measurable sizes in the requested `format`. + Applies the given byte limit to the statement's result size. Byte counts are based on internal data + representations and might not match the final size in the requested `format`. If the result was + truncated due to the byte limit, then `truncated` in the response is set to `true`. When using + `EXTERNAL_LINKS` disposition, a default `byte_limit` of 100 GiB is applied if `byte_limit` is not + explcitly set. :param catalog: str (optional) Sets default catalog for statement execution, similar to [`USE CATALOG`] in SQL. @@ -3509,11 +3419,11 @@ def execute_statement(self, The fetch disposition provides two modes of fetching results: `INLINE` and `EXTERNAL_LINKS`. Statements executed with `INLINE` disposition will return result data inline, in `JSON_ARRAY` - format, in a series of chunks. If a given statement produces a result set with a size larger than 16 + format, in a series of chunks. If a given statement produces a result set with a size larger than 25 MiB, that statement execution is aborted, and no result set will be available. **NOTE** Byte limits are computed based upon internal representations of the result set data, and - may not match the sizes visible in JSON responses. + might not match the sizes visible in JSON responses. Statements executed with `EXTERNAL_LINKS` disposition will return result data as external links: URLs that point to cloud storage internal to the workspace. Using `EXTERNAL_LINKS` disposition @@ -3530,6 +3440,9 @@ def execute_statement(self, Statement execution supports three result formats: `JSON_ARRAY` (default), `ARROW_STREAM`, and `CSV`. + Important: The formats `ARROW_STREAM` and `CSV` are supported only with `EXTERNAL_LINKS` + disposition. `JSON_ARRAY` is supported in `INLINE` and `EXTERNAL_LINKS` disposition. + When specifying `format=JSON_ARRAY`, result data will be formatted as an array of arrays of values, where each value is either the *string representation* of a value, or `null`. For example, the output of `SELECT concat('id-', id) AS strCol, id AS intCol, null AS nullCol FROM range(3)` would @@ -3537,50 +3450,41 @@ def execute_statement(self, ``` [ [ "id-1", "1", null ], [ "id-2", "2", null ], [ "id-3", "3", null ], ] ``` - `JSON_ARRAY` is supported with `INLINE` and `EXTERNAL_LINKS` dispositions. - - `INLINE` `JSON_ARRAY` data can be found at the path `StatementResponse.result.data_array`. - - For `EXTERNAL_LINKS` `JSON_ARRAY` results, each URL points to a file in cloud storage that contains - compact JSON with no indentation or extra whitespace. - - When specifying `format=ARROW_STREAM`, each chunk in the result will be formatted as Apache Arrow - Stream. See the [Apache Arrow streaming format]. + When specifying `format=JSON_ARRAY` and `disposition=EXTERNAL_LINKS`, each chunk in the result + contains compact JSON with no indentation or extra whitespace. - IMPORTANT: The format `ARROW_STREAM` is supported only with `EXTERNAL_LINKS` disposition. + When specifying `format=ARROW_STREAM` and `disposition=EXTERNAL_LINKS`, each chunk in the result + will be formatted as Apache Arrow Stream. See the [Apache Arrow streaming format]. - When specifying `format=CSV`, each chunk in the result will be a CSV according to [RFC 4180] - standard. All the columns values will have *string representation* similar to the `JSON_ARRAY` - format, and `null` values will be encoded as “null”. Only the first chunk in the result would - contain a header row with column names. For example, the output of `SELECT concat('id-', id) AS - strCol, id AS intCol, null as nullCol FROM range(3)` would look like this: + When specifying `format=CSV` and `disposition=EXTERNAL_LINKS`, each chunk in the result will be a + CSV according to [RFC 4180] standard. All the columns values will have *string representation* + similar to the `JSON_ARRAY` format, and `null` values will be encoded as “null”. Only the first + chunk in the result would contain a header row with column names. For example, the output of `SELECT + concat('id-', id) AS strCol, id AS intCol, null as nullCol FROM range(3)` would look like this: ``` strCol,intCol,nullCol id-1,1,null id-2,2,null id-3,3,null ``` - IMPORTANT: The format `CSV` is supported only with `EXTERNAL_LINKS` disposition. - [Apache Arrow streaming format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format [RFC 4180]: https://www.rfc-editor.org/rfc/rfc4180 - :param on_wait_timeout: :class:`TimeoutAction` (optional) - When in synchronous mode with `wait_timeout > 0s` it determines the action taken when the timeout is - reached: - - `CONTINUE` → the statement execution continues asynchronously and the call returns a statement ID - immediately. - - `CANCEL` → the statement execution is canceled and the call returns immediately with a `CANCELED` - state. + :param on_wait_timeout: :class:`ExecuteStatementRequestOnWaitTimeout` (optional) + When `wait_timeout > 0s`, the call will block up to the specified time. If the statement execution + doesn't finish within this time, `on_wait_timeout` determines whether the execution should continue + or be canceled. When set to `CONTINUE`, the statement execution continues asynchronously and the + call returns a statement ID which can be used for polling with + :method:statementexecution/getStatement. When set to `CANCEL`, the statement execution is canceled + and the call returns with a `CANCELED` state. :param parameters: List[:class:`StatementParameterListItem`] (optional) A list of parameters to pass into a SQL statement containing parameter markers. A parameter consists of a name, a value, and optionally a type. To represent a NULL value, the `value` field may be - omitted. If the `type` field is omitted, the value is interpreted as a string. + omitted or set to `null` explicitly. If the `type` field is omitted, the value is interpreted as a + string. If the type is given, parameters will be checked for type correctness according to the given type. A value is correct if the provided string can be converted to the requested type using the `cast` function. The exact semantics are described in the section [`cast` function] of the SQL language reference. - For example, the following statement contains two parameters, `my_id` and `my_date`: + For example, the following statement contains two parameters, `my_name` and `my_date`: SELECT * FROM my_table WHERE name = :my_name AND date = :my_date @@ -3590,29 +3494,34 @@ def execute_statement(self, "parameters": [ { "name": "my_name", "value": "the name" }, { "name": "my_date", "value": "2020-01-01", "type": "DATE" } ] } - Currently, positional parameters denoted by a `?` marker are not supported by the SQL Statement - Execution API. + Currently, positional parameters denoted by a `?` marker are not supported by the Databricks SQL + Statement Execution API. Also see the section [Parameter markers] of the SQL language reference. [Parameter markers]: https://docs.databricks.com/sql/language-manual/sql-ref-parameter-marker.html [`cast` function]: https://docs.databricks.com/sql/language-manual/functions/cast.html :param row_limit: int (optional) - Applies the given row limit to the statement's result set with identical semantics as the SQL - `LIMIT` clause. + Applies the given row limit to the statement's result set, but unlike the `LIMIT` clause in SQL, it + also sets the `truncated` field in the response to indicate whether the result was trimmed due to + the limit or not. :param schema: str (optional) Sets default schema for statement execution, similar to [`USE SCHEMA`] in SQL. [`USE SCHEMA`]: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-use-schema.html - :param statement: str (optional) - SQL statement to execute :param wait_timeout: str (optional) - The time in seconds the API service will wait for the statement's result set as `Ns`, where `N` can - be set to 0 or to a value between 5 and 50. When set to '0s' the statement will execute in - asynchronous mode. - :param warehouse_id: str (optional) - Warehouse upon which to execute a statement. See also [What are SQL - warehouses?](/sql/admin/warehouse-type.html) + The time in seconds the call will wait for the statement's result set as `Ns`, where `N` can be set + to 0 or to a value between 5 and 50. + + When set to `0s`, the statement will execute in asynchronous mode and the call will not wait for the + execution to finish. In this case, the call returns directly with `PENDING` state and a statement ID + which can be used for polling with :method:statementexecution/getStatement. + + When set between 5 and 50 seconds, the call will behave synchronously up to this timeout and wait + for the statement execution to finish. If the execution finishes within this time, the call returns + immediately with a manifest and result data (or a `FAILED` state in case of an execution error). If + the statement takes longer to execute, `on_wait_timeout` determines what should happen after the + timeout is reached. :returns: :class:`ExecuteStatementResponse` """ @@ -3641,7 +3550,7 @@ def get_statement(self, statement_id: str) -> GetStatementResponse: state set. After at least 12 hours in terminal state, the statement is removed from the warehouse and further calls will receive an HTTP 404 response. - **NOTE** This call currently may take up to 5 seconds to get the latest status and result. + **NOTE** This call currently might take up to 5 seconds to get the latest status and result. :param statement_id: str @@ -3655,11 +3564,12 @@ def get_statement(self, statement_id: str) -> GetStatementResponse: def get_statement_result_chunk_n(self, statement_id: str, chunk_index: int) -> ResultData: """Get result chunk by index. - After the statement execution has `SUCCEEDED`, the result data can be fetched by chunks. Whereas the - first chuck with `chunk_index=0` is typically fetched through a `get status` request, subsequent - chunks can be fetched using a `get result` request. The response structure is identical to the nested - `result` element described in the `get status` request, and similarly includes the `next_chunk_index` - and `next_chunk_internal_link` fields for simple iteration through the result set. + After the statement execution has `SUCCEEDED`, this request can be used to fetch any chunk by index. + Whereas the first chunk with `chunk_index=0` is typically fetched with + :method:statementexecution/executeStatement or :method:statementexecution/getStatement, this request + can be used to fetch subsequent chunks. The response structure is identical to the nested `result` + element described in the :method:statementexecution/getStatement request, and similarly includes the + `next_chunk_index` and `next_chunk_internal_link` fields for simple iteration through the result set. :param statement_id: str :param chunk_index: int diff --git a/docs/account/account-oauth2.rst b/docs/account/account-oauth2.rst index f504ce4c0..be43686fb 100644 --- a/docs/account/account-oauth2.rst +++ b/docs/account/account-oauth2.rst @@ -9,5 +9,6 @@ Configure OAuth 2.0 application registrations for Databricks custom_app_integration o_auth_enrollment + o_auth_published_apps published_app_integration service_principal_secrets \ No newline at end of file diff --git a/docs/account/custom_app_integration.rst b/docs/account/custom_app_integration.rst index 6a811a77f..19e2c39ca 100644 --- a/docs/account/custom_app_integration.rst +++ b/docs/account/custom_app_integration.rst @@ -4,9 +4,6 @@ OAuth Custom App Integration These APIs enable administrators to manage custom oauth app integrations, which is required for adding/using Custom OAuth App Integration like Tableau Cloud for Databricks in AWS cloud. - - **Note:** You can only add/use the OAuth custom application integrations when OAuth enrollment status is - enabled. For more details see :method:OAuthEnrollment/create .. py:method:: create(name, redirect_urls [, confidential, scopes, token_access_policy]) diff --git a/docs/account/ip_access_lists.rst b/docs/account/ip_access_lists.rst index 370a25c19..f49b6363a 100644 --- a/docs/account/ip_access_lists.rst +++ b/docs/account/ip_access_lists.rst @@ -60,7 +60,10 @@ Account IP Access Lists :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` - This describes an enum + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. :param ip_addresses: List[str] Array of IP addresses or CIDR values to be added to the IP access list. @@ -171,7 +174,10 @@ Account IP Access Lists :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` - This describes an enum + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. :param ip_addresses: List[str] Array of IP addresses or CIDR values to be added to the IP access list. :param enabled: bool @@ -205,7 +211,10 @@ Account IP Access Lists :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` - This describes an enum + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. :param ip_addresses: List[str] Array of IP addresses or CIDR values to be added to the IP access list. :param enabled: bool diff --git a/docs/account/metastore_assignments.rst b/docs/account/metastore_assignments.rst index 574fd2254..3f0902a27 100644 --- a/docs/account/metastore_assignments.rst +++ b/docs/account/metastore_assignments.rst @@ -56,7 +56,7 @@ Account Metastore Assignments :param metastore_id: str Unity Catalog metastore ID - :returns: Iterator over :class:`MetastoreAssignment` + :returns: Iterator over int .. py:method:: update(workspace_id, metastore_id [, metastore_assignment]) diff --git a/docs/account/o_auth_published_apps.rst b/docs/account/o_auth_published_apps.rst new file mode 100644 index 000000000..61458e899 --- /dev/null +++ b/docs/account/o_auth_published_apps.rst @@ -0,0 +1,21 @@ +OAuth Published App +=================== +.. py:class:: OAuthPublishedAppsAPI + + These APIs enable administrators to view all the available published OAuth applications in Databricks. + Administrators can add the published OAuth applications to their account through the OAuth Published App + Integration APIs. + + .. py:method:: list( [, page_size, page_token]) + + Get all the published OAuth apps. + + Get all the available published OAuth apps in Databricks. + + :param page_size: int (optional) + The max number of OAuth published apps to return. + :param page_token: str (optional) + A token that can be used to get the next page of results. + + :returns: Iterator over :class:`PublishedAppOutput` + \ No newline at end of file diff --git a/docs/account/published_app_integration.rst b/docs/account/published_app_integration.rst index abcf3a38b..689597da7 100644 --- a/docs/account/published_app_integration.rst +++ b/docs/account/published_app_integration.rst @@ -3,10 +3,7 @@ OAuth Published App Integration .. py:class:: PublishedAppIntegrationAPI These APIs enable administrators to manage published oauth app integrations, which is required for - adding/using Published OAuth App Integration like Tableau Cloud for Databricks in AWS cloud. - - **Note:** You can only add/use the OAuth published application integrations when OAuth enrollment status - is enabled. For more details see :method:OAuthEnrollment/create + adding/using Published OAuth App Integration like Tableau Desktop for Databricks in AWS cloud. .. py:method:: create( [, app_id, token_access_policy]) diff --git a/docs/workspace/artifact_allowlists.rst b/docs/workspace/artifact_allowlists.rst index db57a4bf9..c96b9d8f6 100644 --- a/docs/workspace/artifact_allowlists.rst +++ b/docs/workspace/artifact_allowlists.rst @@ -9,7 +9,8 @@ Artifact Allowlists Get an artifact allowlist. - Get the artifact allowlist of a certain artifact type. The caller must be a metastore admin. + Get the artifact allowlist of a certain artifact type. The caller must be a metastore admin or have + the **MANAGE ALLOWLIST** privilege on the metastore. :param artifact_type: :class:`ArtifactType` The artifact type of the allowlist. @@ -22,9 +23,11 @@ Artifact Allowlists Set an artifact allowlist. Set the artifact allowlist of a certain artifact type. The whole artifact allowlist is replaced with - the new allowlist. The caller must be a metastore admin. + the new allowlist. The caller must be a metastore admin or have the **MANAGE ALLOWLIST** privilege on + the metastore. - :param artifact_matchers: :class:`ArtifactMatcher` + :param artifact_matchers: List[:class:`ArtifactMatcher`] + A list of allowed artifact match patterns. :param artifact_type: :class:`ArtifactType` The artifact type of the allowlist. diff --git a/docs/workspace/clusters.rst b/docs/workspace/clusters.rst index 6b70a6b53..bcd1886f5 100644 --- a/docs/workspace/clusters.rst +++ b/docs/workspace/clusters.rst @@ -140,7 +140,18 @@ Clusters - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags :param data_security_mode: :class:`DataSecurityMode` (optional) - This describes an enum + Data security mode decides what data governance model to use when accessing data from a cluster. + + * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are + not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a + single user specified in `single_user_name`. Most programming languages, cluster features and data + governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be + shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data + and credentials. Most data governance features are supported in this mode. But programming languages + and cluster features might be limited. * `LEGACY_TABLE_ACL`: This mode is for users migrating from + legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy + Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating + from legacy Passthrough on standard clusters. :param docker_image: :class:`DockerImage` (optional) :param driver_instance_pool_id: str (optional) The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses @@ -340,7 +351,18 @@ Clusters - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags :param data_security_mode: :class:`DataSecurityMode` (optional) - This describes an enum + Data security mode decides what data governance model to use when accessing data from a cluster. + + * `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are + not available in this mode. * `SINGLE_USER`: A secure cluster that can only be exclusively used by a + single user specified in `single_user_name`. Most programming languages, cluster features and data + governance features are available in this mode. * `USER_ISOLATION`: A secure cluster that can be + shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data + and credentials. Most data governance features are supported in this mode. But programming languages + and cluster features might be limited. * `LEGACY_TABLE_ACL`: This mode is for users migrating from + legacy Table ACL clusters. * `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy + Passthrough on high concurrency clusters. * `LEGACY_SINGLE_USER`: This mode is for users migrating + from legacy Passthrough on standard clusters. :param docker_image: :class:`DockerImage` (optional) :param driver_instance_pool_id: str (optional) The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses diff --git a/docs/workspace/credentials_manager.rst b/docs/workspace/credentials_manager.rst new file mode 100644 index 000000000..8c83088ce --- /dev/null +++ b/docs/workspace/credentials_manager.rst @@ -0,0 +1,21 @@ +Credentials Manager +=================== +.. py:class:: CredentialsManagerAPI + + Credentials manager interacts with with Identity Providers to to perform token exchanges using stored + credentials and refresh tokens. + + .. py:method:: exchange_token(partition_id, token_type, scopes) + + Exchange token. + + Exchange tokens with an Identity Provider to get a new access token. It allowes specifying scopes to + determine token permissions. + + :param partition_id: :class:`PartitionId` + :param token_type: List[:class:`TokenType`] + :param scopes: List[str] + Array of scopes for the token request. + + :returns: :class:`ExchangeTokenResponse` + \ No newline at end of file diff --git a/docs/workspace/ip_access_lists.rst b/docs/workspace/ip_access_lists.rst index c53cbcb73..b0381c871 100644 --- a/docs/workspace/ip_access_lists.rst +++ b/docs/workspace/ip_access_lists.rst @@ -60,7 +60,10 @@ IP Access Lists :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` - This describes an enum + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. :param ip_addresses: List[str] Array of IP addresses or CIDR values to be added to the IP access list. @@ -172,7 +175,10 @@ IP Access Lists :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` - This describes an enum + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. :param ip_addresses: List[str] Array of IP addresses or CIDR values to be added to the IP access list. :param enabled: bool @@ -207,7 +213,10 @@ IP Access Lists :param label: str Label for the IP access list. This **cannot** be empty. :param list_type: :class:`ListType` - This describes an enum + Type of IP access list. Valid values are as follows and are case-sensitive: + + * `ALLOW`: An allow list. Include this IP or range. * `BLOCK`: A block list. Exclude this IP or + range. IP addresses in the block list are excluded even if they are included in an allow list. :param ip_addresses: List[str] Array of IP addresses or CIDR values to be added to the IP access list. :param enabled: bool diff --git a/docs/workspace/jobs.rst b/docs/workspace/jobs.rst index c29c1308b..d64f16d19 100644 --- a/docs/workspace/jobs.rst +++ b/docs/workspace/jobs.rst @@ -18,7 +18,7 @@ Jobs [Secrets CLI]: https://docs.databricks.com/dev-tools/cli/secrets-cli.html [Secrets utility]: https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-secrets - .. py:method:: cancel_all_runs(job_id) + .. py:method:: cancel_all_runs( [, all_queued_runs, job_id]) Usage: @@ -56,8 +56,11 @@ Jobs Cancels all active runs of a job. The runs are canceled asynchronously, so it doesn't prevent new runs from being started. - :param job_id: int - The canonical identifier of the job to cancel all runs of. This field is required. + :param all_queued_runs: bool (optional) + Optional boolean parameter to cancel all queued runs. If no job_id is provided, all queued runs in + the workspace are canceled. + :param job_id: int (optional) + The canonical identifier of the job to cancel all runs of. @@ -110,7 +113,7 @@ Jobs See :method:wait_get_run_job_terminated_or_skipped for more details. - .. py:method:: create( [, access_control_list, compute, continuous, email_notifications, format, git_source, health, job_clusters, max_concurrent_runs, name, notification_settings, parameters, run_as, schedule, tags, tasks, timeout_seconds, trigger, webhook_notifications]) + .. py:method:: create( [, access_control_list, compute, continuous, email_notifications, format, git_source, health, job_clusters, max_concurrent_runs, name, notification_settings, parameters, queue, run_as, schedule, tags, tasks, timeout_seconds, trigger, webhook_notifications]) Usage: @@ -193,6 +196,8 @@ Jobs `email_notifications` and `webhook_notifications` for this job. :param parameters: List[:class:`JobParameterDefinition`] (optional) Job-level parameter definitions + :param queue: :class:`QueueSettings` (optional) + The queue settings of the job. :param run_as: :class:`JobRunAs` (optional) Write-only setting, available only in Create/Update/Reset and Submit calls. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the @@ -491,8 +496,8 @@ Jobs :param active_only: bool (optional) If active_only is `true`, only active runs are included in the results; otherwise, lists both active - and completed runs. An active run is a run in the `PENDING`, `RUNNING`, or `TERMINATING`. This field - cannot be `true` when completed_only is `true`. + and completed runs. An active run is a run in the `QUEUED`, `PENDING`, `RUNNING`, or `TERMINATING`. + This field cannot be `true` when completed_only is `true`. :param completed_only: bool (optional) If completed_only is `true`, only completed runs are included in the results; otherwise, lists both active and completed runs. This field cannot be `true` when active_only is `true`. @@ -571,10 +576,10 @@ Jobs An array of commands to execute for jobs with the dbt task, for example `"dbt_commands": ["dbt deps", "dbt seed", "dbt run"]` :param jar_params: List[str] (optional) - A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", - "35"]`. The parameters are used to invoke the main function of the main class specified in the - Spark JAR task. If not specified upon `run-now`, it defaults to an empty list. jar_params cannot be - specified in conjunction with notebook_params. The JSON representation of this field (for example + A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", "35"]`. + The parameters are used to invoke the main function of the main class specified in the Spark JAR + task. If not specified upon `run-now`, it defaults to an empty list. jar_params cannot be specified + in conjunction with notebook_params. The JSON representation of this field (for example `{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables](/jobs.html"#parameter-variables") to set parameters containing @@ -603,10 +608,10 @@ Jobs A map from keys to values for jobs with Python wheel task, for example `"python_named_params": {"name": "task", "data": "dbfs:/path/to/data.json"}`. :param python_params: List[str] (optional) - A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", - "35"]`. The parameters are passed to Python file as command-line parameters. If specified upon - `run-now`, it would overwrite the parameters specified in job setting. The JSON representation of - this field (for example `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. + A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. + The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it + would overwrite the parameters specified in job setting. The JSON representation of this field (for + example `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables] to set parameters containing information about job runs. @@ -626,10 +631,10 @@ Jobs The task keys of the task runs to repair. :param spark_submit_params: List[str] (optional) A list of parameters for jobs with spark submit task, for example `"spark_submit_params": - ["--class", "org.apache.spark.examples.SparkPi"]`. The parameters are passed to spark-submit - script as command-line parameters. If specified upon `run-now`, it would overwrite the parameters - specified in job setting. The JSON representation of this field (for example - `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. + ["--class", "org.apache.spark.examples.SparkPi"]`. The parameters are passed to spark-submit script + as command-line parameters. If specified upon `run-now`, it would overwrite the parameters specified + in job setting. The JSON representation of this field (for example `{"python_params":["john + doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables] to set parameters containing information about job runs @@ -702,7 +707,7 @@ Jobs - .. py:method:: run_now(job_id [, dbt_commands, idempotency_token, jar_params, job_parameters, notebook_params, pipeline_params, python_named_params, python_params, spark_submit_params, sql_params]) + .. py:method:: run_now(job_id [, dbt_commands, idempotency_token, jar_params, job_parameters, notebook_params, pipeline_params, python_named_params, python_params, queue, spark_submit_params, sql_params]) Usage: @@ -758,10 +763,10 @@ Jobs [How to ensure idempotency for jobs]: https://kb.databricks.com/jobs/jobs-idempotency.html :param jar_params: List[str] (optional) - A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", - "35"]`. The parameters are used to invoke the main function of the main class specified in the - Spark JAR task. If not specified upon `run-now`, it defaults to an empty list. jar_params cannot be - specified in conjunction with notebook_params. The JSON representation of this field (for example + A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", "35"]`. + The parameters are used to invoke the main function of the main class specified in the Spark JAR + task. If not specified upon `run-now`, it defaults to an empty list. jar_params cannot be specified + in conjunction with notebook_params. The JSON representation of this field (for example `{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables](/jobs.html"#parameter-variables") to set parameters containing @@ -789,10 +794,10 @@ Jobs A map from keys to values for jobs with Python wheel task, for example `"python_named_params": {"name": "task", "data": "dbfs:/path/to/data.json"}`. :param python_params: List[str] (optional) - A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", - "35"]`. The parameters are passed to Python file as command-line parameters. If specified upon - `run-now`, it would overwrite the parameters specified in job setting. The JSON representation of - this field (for example `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. + A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`. + The parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it + would overwrite the parameters specified in job setting. The JSON representation of this field (for + example `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables] to set parameters containing information about job runs. @@ -803,12 +808,14 @@ Jobs emojis. [Task parameter variables]: https://docs.databricks.com/jobs.html#parameter-variables + :param queue: :class:`QueueSettings` (optional) + The queue settings of the run. :param spark_submit_params: List[str] (optional) A list of parameters for jobs with spark submit task, for example `"spark_submit_params": - ["--class", "org.apache.spark.examples.SparkPi"]`. The parameters are passed to spark-submit - script as command-line parameters. If specified upon `run-now`, it would overwrite the parameters - specified in job setting. The JSON representation of this field (for example - `{"python_params":["john doe","35"]}`) cannot exceed 10,000 bytes. + ["--class", "org.apache.spark.examples.SparkPi"]`. The parameters are passed to spark-submit script + as command-line parameters. If specified upon `run-now`, it would overwrite the parameters specified + in job setting. The JSON representation of this field (for example `{"python_params":["john + doe","35"]}`) cannot exceed 10,000 bytes. Use [Task parameter variables] to set parameters containing information about job runs @@ -841,7 +848,7 @@ Jobs :returns: :class:`JobPermissions` - .. py:method:: submit( [, access_control_list, email_notifications, git_source, health, idempotency_token, notification_settings, run_name, tasks, timeout_seconds, webhook_notifications]) + .. py:method:: submit( [, access_control_list, email_notifications, git_source, health, idempotency_token, notification_settings, queue, run_name, tasks, timeout_seconds, webhook_notifications]) Usage: @@ -908,6 +915,8 @@ Jobs :param notification_settings: :class:`JobNotificationSettings` (optional) Optional notification settings that are used when sending notifications to each of the `webhook_notifications` for this run. + :param queue: :class:`QueueSettings` (optional) + The queue settings of the one-time run. :param run_name: str (optional) An optional name for the run. The default value is `Untitled`. :param tasks: List[:class:`SubmitTask`] (optional) diff --git a/docs/workspace/model_registry.rst b/docs/workspace/model_registry.rst index ebea5bd59..bf8663466 100644 --- a/docs/workspace/model_registry.rst +++ b/docs/workspace/model_registry.rst @@ -227,7 +227,13 @@ Model Registry :param model_name: str (optional) Name of the model whose events would trigger this webhook. :param status: :class:`RegistryWebhookStatus` (optional) - This describes an enum + Enable or disable triggering the webhook, or put the webhook into test mode. The default is + `ACTIVE`: * `ACTIVE`: Webhook is triggered when an associated event happens. + + * `DISABLED`: Webhook is not triggered. + + * `TEST_MODE`: Webhook can be triggered through the test endpoint, but is not triggered on a real + event. :returns: :class:`CreateWebhookResponse` @@ -877,7 +883,13 @@ Model Registry :param http_url_spec: :class:`HttpUrlSpec` (optional) :param job_spec: :class:`JobSpec` (optional) :param status: :class:`RegistryWebhookStatus` (optional) - This describes an enum + Enable or disable triggering the webhook, or put the webhook into test mode. The default is + `ACTIVE`: * `ACTIVE`: Webhook is triggered when an associated event happens. + + * `DISABLED`: Webhook is not triggered. + + * `TEST_MODE`: Webhook can be triggered through the test endpoint, but is not triggered on a real + event. \ No newline at end of file diff --git a/docs/workspace/serving_endpoints.rst b/docs/workspace/serving_endpoints.rst index f628967d2..8eab39336 100644 --- a/docs/workspace/serving_endpoints.rst +++ b/docs/workspace/serving_endpoints.rst @@ -27,7 +27,7 @@ Serving endpoints :returns: :class:`BuildLogsResponse` - .. py:method:: create(name, config) + .. py:method:: create(name, config [, tags]) Create a new serving endpoint. @@ -36,6 +36,8 @@ Serving endpoints workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. :param config: :class:`EndpointCoreConfigInput` The core config of the serving endpoint. + :param tags: List[:class:`EndpointTag`] (optional) + Tags to be attached to the serving endpoint and automatically propagated to billing logs. :returns: Long-running operation waiter for :class:`ServingEndpointDetailed`. @@ -123,6 +125,22 @@ Serving endpoints :returns: :class:`ServerLogsResponse` + .. py:method:: patch(name [, add_tags, delete_tags]) + + Patch the tags of a serving endpoint. + + Used to batch add and delete tags from a serving endpoint with a single API call. + + :param name: str + The name of the serving endpoint who's tags to patch. This field is required. + :param add_tags: List[:class:`EndpointTag`] (optional) + List of endpoint tags to add + :param delete_tags: List[str] (optional) + List of tag keys to delete + + :returns: Iterator over :class:`EndpointTag` + + .. py:method:: query(name) Query a serving endpoint with provided model input. diff --git a/docs/workspace/statement_execution.rst b/docs/workspace/statement_execution.rst index fcad43273..19b63200b 100644 --- a/docs/workspace/statement_execution.rst +++ b/docs/workspace/statement_execution.rst @@ -2,161 +2,88 @@ Statement Execution =================== .. py:class:: StatementExecutionAPI - The SQL Statement Execution API manages the execution of arbitrary SQL statements and the fetching of - result data. - - **Release status** - - This feature is in [Public Preview]. + The Databricks SQL Statement Execution API can be used to execute SQL statements on a SQL warehouse and + fetch the result. **Getting started** - We suggest beginning with the [SQL Statement Execution API tutorial]. + We suggest beginning with the [Databricks SQL Statement Execution API tutorial]. **Overview of statement execution and result fetching** Statement execution begins by issuing a :method:statementexecution/executeStatement request with a valid SQL statement and warehouse ID, along with optional parameters such as the data catalog and output format. - - When submitting the statement, the call can behave synchronously or asynchronously, based on the - `wait_timeout` setting. When set between 5-50 seconds (default: 10) the call behaves synchronously and - waits for results up to the specified timeout; when set to `0s`, the call is asynchronous and responds - immediately with a statement ID that can be used to poll for status or fetch the results in a separate - call. - - **Call mode: synchronous** - - In synchronous mode, when statement execution completes within the `wait timeout`, the result data is - returned directly in the response. This response will contain `statement_id`, `status`, `manifest`, and - `result` fields. The `status` field confirms success whereas the `manifest` field contains the result data - column schema and metadata about the result set. The `result` field contains the first chunk of result - data according to the specified `disposition`, and links to fetch any remaining chunks. - - If the execution does not complete before `wait_timeout`, the setting `on_wait_timeout` determines how the - system responds. - - By default, `on_wait_timeout=CONTINUE`, and after reaching `wait_timeout`, a response is returned and - statement execution continues asynchronously. The response will contain only `statement_id` and `status` - fields, and the caller must now follow the flow described for asynchronous call mode to poll and fetch the - result. - - Alternatively, `on_wait_timeout` can also be set to `CANCEL`; in this case if the timeout is reached - before execution completes, the underlying statement execution is canceled, and a `CANCELED` status is - returned in the response. - - **Call mode: asynchronous** - - In asynchronous mode, or after a timed-out synchronous request continues, a `statement_id` and `status` - will be returned. In this case polling :method:statementexecution/getStatement calls are required to fetch - the result and metadata. - - Next, a caller must poll until execution completes (`SUCCEEDED`, `FAILED`, etc.) by issuing - :method:statementexecution/getStatement requests for the given `statement_id`. - - When execution has succeeded, the response will contain `status`, `manifest`, and `result` fields. These - fields and the structure are identical to those in the response to a successful synchronous submission. - The `result` field will contain the first chunk of result data, either `INLINE` or as `EXTERNAL_LINKS` - depending on `disposition`. Additional chunks of result data can be fetched by checking for the presence - of the `next_chunk_internal_link` field, and iteratively `GET` those paths until that field is unset: `GET - https://$DATABRICKS_HOST/{next_chunk_internal_link}`. + If no other parameters are specified, the server will wait for up to 10s before returning a response. If + the statement has completed within this timespan, the response will include the result data as a JSON + array and metadata. Otherwise, if no result is available after the 10s timeout expired, the response will + provide the statement ID that can be used to poll for results by using a + :method:statementexecution/getStatement request. + + You can specify whether the call should behave synchronously, asynchronously or start synchronously with a + fallback to asynchronous execution. This is controlled with the `wait_timeout` and `on_wait_timeout` + settings. If `wait_timeout` is set between 5-50 seconds (default: 10s), the call waits for results up to + the specified timeout; when set to `0s`, the call is asynchronous and responds immediately with a + statement ID. The `on_wait_timeout` setting specifies what should happen when the timeout is reached while + the statement execution has not yet finished. This can be set to either `CONTINUE`, to fallback to + asynchronous mode, or it can be set to `CANCEL`, which cancels the statement. + + In summary: - Synchronous mode - `wait_timeout=30s` and `on_wait_timeout=CANCEL` - The call waits up to 30 + seconds; if the statement execution finishes within this time, the result data is returned directly in the + response. If the execution takes longer than 30 seconds, the execution is canceled and the call returns + with a `CANCELED` state. - Asynchronous mode - `wait_timeout=0s` (`on_wait_timeout` is ignored) - The call + doesn't wait for the statement to finish but returns directly with a statement ID. The status of the + statement execution can be polled by issuing :method:statementexecution/getStatement with the statement + ID. Once the execution has succeeded, this call also returns the result and metadata in the response. - + Hybrid mode (default) - `wait_timeout=10s` and `on_wait_timeout=CONTINUE` - The call waits for up to 10 + seconds; if the statement execution finishes within this time, the result data is returned directly in the + response. If the execution takes longer than 10 seconds, a statement ID is returned. The statement ID can + be used to fetch status and results in the same way as in the asynchronous mode. + + Depending on the size, the result can be split into multiple chunks. If the statement execution is + successful, the statement response contains a manifest and the first chunk of the result. The manifest + contains schema information and provides metadata for each chunk in the result. Result chunks can be + retrieved by index with :method:statementexecution/getStatementResultChunkN which may be called in any + order and in parallel. For sequential fetching, each chunk, apart from the last, also contains a + `next_chunk_index` and `next_chunk_internal_link` that point to the next chunk. + + A statement can be canceled with :method:statementexecution/cancelExecution. **Fetching result data: format and disposition** - To specify the result data format, set the `format` field to `JSON_ARRAY` (JSON), `ARROW_STREAM` ([Apache - Arrow Columnar]), or `CSV`. - - You can also configure how to fetch the result data in two different modes by setting the `disposition` - field to `INLINE` or `EXTERNAL_LINKS`. - - The `INLINE` disposition can only be used with the `JSON_ARRAY` format and allows results up to 16 MiB. - When a statement executed with `INLINE` disposition exceeds this limit, the execution is aborted, and no - result can be fetched. - - The `EXTERNAL_LINKS` disposition allows fetching large result sets in `JSON_ARRAY`, `ARROW_STREAM` and - `CSV` formats, and with higher throughput. - - The API uses defaults of `format=JSON_ARRAY` and `disposition=INLINE`. Databricks recommends that you - explicit setting the format and the disposition for all production use cases. - - **Statement response: statement_id, status, manifest, and result** - - The base call :method:statementexecution/getStatement returns a single response combining `statement_id`, - `status`, a result `manifest`, and a `result` data chunk or link, depending on the `disposition`. The - `manifest` contains the result schema definition and the result summary metadata. When using - `disposition=EXTERNAL_LINKS`, it also contains a full listing of all chunks and their summary metadata. - - **Use case: small result sets with INLINE + JSON_ARRAY** - - For flows that generate small and predictable result sets (<= 16 MiB), `INLINE` downloads of `JSON_ARRAY` - result data are typically the simplest way to execute and fetch result data. - - When the result set with `disposition=INLINE` is larger, the result can be transferred in chunks. After - receiving the initial chunk with :method:statementexecution/executeStatement or - :method:statementexecution/getStatement subsequent calls are required to iteratively fetch each chunk. - Each result response contains a link to the next chunk, when there are additional chunks to fetch; it can - be found in the field `.next_chunk_internal_link`. This link is an absolute `path` to be joined with your - `$DATABRICKS_HOST`, and of the form `/api/2.0/sql/statements/{statement_id}/result/chunks/{chunk_index}`. - The next chunk can be fetched by issuing a :method:statementexecution/getStatementResultChunkN request. - - When using this mode, each chunk may be fetched once, and in order. A chunk without a field - `next_chunk_internal_link` indicates the last chunk was reached and all chunks have been fetched from the - result set. + To specify the format of the result data, use the `format` field, which can be set to one of the following + options: `JSON_ARRAY` (JSON), `ARROW_STREAM` ([Apache Arrow Columnar]), or `CSV`. - **Use case: large result sets with EXTERNAL_LINKS + ARROW_STREAM** + There are two ways to receive statement results, controlled by the `disposition` setting, which can be + either `INLINE` or `EXTERNAL_LINKS`: - Using `EXTERNAL_LINKS` to fetch result data in Arrow format allows you to fetch large result sets - efficiently. The primary difference from using `INLINE` disposition is that fetched result chunks contain - resolved `external_links` URLs, which can be fetched with standard HTTP. + - `INLINE`: In this mode, the result data is directly included in the response. It's best suited for + smaller results. This mode can only be used with the `JSON_ARRAY` format. - **Presigned URLs** + - `EXTERNAL_LINKS`: In this mode, the response provides links that can be used to download the result data + in chunks separately. This approach is ideal for larger results and offers higher throughput. This mode + can be used with all the formats: `JSON_ARRAY`, `ARROW_STREAM`, and `CSV`. - External links point to data stored within your workspace's internal DBFS, in the form of a presigned URL. - The URLs are valid for only a short period, <= 15 minutes. Alongside each `external_link` is an expiration - field indicating the time at which the URL is no longer valid. In `EXTERNAL_LINKS` mode, chunks can be - resolved and fetched multiple times and in parallel. - - ---- - - ### **Warning: We recommend you protect the URLs in the EXTERNAL_LINKS.** - - When using the EXTERNAL_LINKS disposition, a short-lived pre-signed URL is generated, which the client can - use to download the result chunk directly from cloud storage. As the short-lived credential is embedded in - a pre-signed URL, this URL should be protected. - - Since pre-signed URLs are generated with embedded temporary credentials, you need to remove the - authorization header from the fetch requests. - - ---- - - Similar to `INLINE` mode, callers can iterate through the result set, by using the - `next_chunk_internal_link` field. Each internal link response will contain an external link to the raw - chunk data, and additionally contain the `next_chunk_internal_link` if there are more chunks. - - Unlike `INLINE` mode, when using `EXTERNAL_LINKS`, chunks may be fetched out of order, and in parallel to - achieve higher throughput. + By default, the API uses `format=JSON_ARRAY` and `disposition=INLINE`. **Limits and limitations** - Note: All byte limits are calculated based on internal storage metrics and will not match byte counts of - actual payloads. - - - Statements with `disposition=INLINE` are limited to 16 MiB and will abort when this limit is exceeded. - - Statements with `disposition=EXTERNAL_LINKS` are limited to 100 GiB. - The maximum query text size is 16 - MiB. - Cancelation may silently fail. A successful response from a cancel request indicates that the - cancel request was successfully received and sent to the processing engine. However, for example, an - outstanding statement may complete execution during signal delivery, with the cancel signal arriving too - late to be meaningful. Polling for status until a terminal state is reached is a reliable way to determine - the final state. - Wait timeouts are approximate, occur server-side, and cannot account for caller delays, - network latency from caller to service, and similarly. - After a statement has been submitted and a - statement_id is returned, that statement's status and result will automatically close after either of 2 - conditions: - The last result chunk is fetched (or resolved to an external link). - One hour passes with - no calls to get the status or fetch the result. Best practice: in asynchronous clients, poll for status - regularly (and with backoff) to keep the statement open and alive. - After fetching the last result chunk - (including chunk_index=0) the statement is automatically closed. + Note: The byte limit for INLINE disposition is based on internal storage metrics and will not exactly + match the byte count of the actual payload. + + - Statements with `disposition=INLINE` are limited to 25 MiB and will fail when this limit is exceeded. - + Statements with `disposition=EXTERNAL_LINKS` are limited to 100 GiB. Result sets larger than this limit + will be truncated. Truncation is indicated by the `truncated` field in the result manifest. - The maximum + query text size is 16 MiB. - Cancelation might silently fail. A successful response from a cancel request + indicates that the cancel request was successfully received and sent to the processing engine. However, an + outstanding statement might have already completed execution when the cancel request arrives. Polling for + status until a terminal state is reached is a reliable way to determine the final state. - Wait timeouts + are approximate, occur server-side, and cannot account for things such as caller delays and network + latency from caller to service. - The system will auto-close a statement after one hour if the client + stops polling and thus you must poll at least once an hour. - The results are only available for one hour + after success; polling does not extend this. [Apache Arrow Columnar]: https://arrow.apache.org/overview/ - [Public Preview]: https://docs.databricks.com/release-notes/release-types.html - [SQL Statement Execution API tutorial]: https://docs.databricks.com/sql/api/sql-execution-tutorial.html + [Databricks SQL Statement Execution API tutorial]: https://docs.databricks.com/sql/api/sql-execution-tutorial.html .. py:method:: cancel_execution(statement_id) @@ -170,15 +97,21 @@ Statement Execution - .. py:method:: execute_statement( [, byte_limit, catalog, disposition, format, on_wait_timeout, parameters, row_limit, schema, statement, wait_timeout, warehouse_id]) + .. py:method:: execute_statement(statement, warehouse_id [, byte_limit, catalog, disposition, format, on_wait_timeout, parameters, row_limit, schema, wait_timeout]) Execute a SQL statement. - Execute a SQL statement, and if flagged as such, await its result for a specified time. - + :param statement: str + The SQL statement to execute. The statement can optionally be parameterized, see `parameters`. + :param warehouse_id: str + Warehouse upon which to execute a statement. See also [What are SQL + warehouses?](/sql/admin/warehouse-type.html) :param byte_limit: int (optional) - Applies the given byte limit to the statement's result size. Byte counts are based on internal - representations and may not match measurable sizes in the requested `format`. + Applies the given byte limit to the statement's result size. Byte counts are based on internal data + representations and might not match the final size in the requested `format`. If the result was + truncated due to the byte limit, then `truncated` in the response is set to `true`. When using + `EXTERNAL_LINKS` disposition, a default `byte_limit` of 100 GiB is applied if `byte_limit` is not + explcitly set. :param catalog: str (optional) Sets default catalog for statement execution, similar to [`USE CATALOG`] in SQL. @@ -187,11 +120,11 @@ Statement Execution The fetch disposition provides two modes of fetching results: `INLINE` and `EXTERNAL_LINKS`. Statements executed with `INLINE` disposition will return result data inline, in `JSON_ARRAY` - format, in a series of chunks. If a given statement produces a result set with a size larger than 16 + format, in a series of chunks. If a given statement produces a result set with a size larger than 25 MiB, that statement execution is aborted, and no result set will be available. **NOTE** Byte limits are computed based upon internal representations of the result set data, and - may not match the sizes visible in JSON responses. + might not match the sizes visible in JSON responses. Statements executed with `EXTERNAL_LINKS` disposition will return result data as external links: URLs that point to cloud storage internal to the workspace. Using `EXTERNAL_LINKS` disposition @@ -208,6 +141,9 @@ Statement Execution Statement execution supports three result formats: `JSON_ARRAY` (default), `ARROW_STREAM`, and `CSV`. + Important: The formats `ARROW_STREAM` and `CSV` are supported only with `EXTERNAL_LINKS` + disposition. `JSON_ARRAY` is supported in `INLINE` and `EXTERNAL_LINKS` disposition. + When specifying `format=JSON_ARRAY`, result data will be formatted as an array of arrays of values, where each value is either the *string representation* of a value, or `null`. For example, the output of `SELECT concat('id-', id) AS strCol, id AS intCol, null AS nullCol FROM range(3)` would @@ -215,50 +151,41 @@ Statement Execution ``` [ [ "id-1", "1", null ], [ "id-2", "2", null ], [ "id-3", "3", null ], ] ``` - `JSON_ARRAY` is supported with `INLINE` and `EXTERNAL_LINKS` dispositions. - - `INLINE` `JSON_ARRAY` data can be found at the path `StatementResponse.result.data_array`. - - For `EXTERNAL_LINKS` `JSON_ARRAY` results, each URL points to a file in cloud storage that contains - compact JSON with no indentation or extra whitespace. + When specifying `format=JSON_ARRAY` and `disposition=EXTERNAL_LINKS`, each chunk in the result + contains compact JSON with no indentation or extra whitespace. - When specifying `format=ARROW_STREAM`, each chunk in the result will be formatted as Apache Arrow - Stream. See the [Apache Arrow streaming format]. + When specifying `format=ARROW_STREAM` and `disposition=EXTERNAL_LINKS`, each chunk in the result + will be formatted as Apache Arrow Stream. See the [Apache Arrow streaming format]. - IMPORTANT: The format `ARROW_STREAM` is supported only with `EXTERNAL_LINKS` disposition. - - When specifying `format=CSV`, each chunk in the result will be a CSV according to [RFC 4180] - standard. All the columns values will have *string representation* similar to the `JSON_ARRAY` - format, and `null` values will be encoded as “null”. Only the first chunk in the result would - contain a header row with column names. For example, the output of `SELECT concat('id-', id) AS - strCol, id AS intCol, null as nullCol FROM range(3)` would look like this: + When specifying `format=CSV` and `disposition=EXTERNAL_LINKS`, each chunk in the result will be a + CSV according to [RFC 4180] standard. All the columns values will have *string representation* + similar to the `JSON_ARRAY` format, and `null` values will be encoded as “null”. Only the first + chunk in the result would contain a header row with column names. For example, the output of `SELECT + concat('id-', id) AS strCol, id AS intCol, null as nullCol FROM range(3)` would look like this: ``` strCol,intCol,nullCol id-1,1,null id-2,2,null id-3,3,null ``` - IMPORTANT: The format `CSV` is supported only with `EXTERNAL_LINKS` disposition. - [Apache Arrow streaming format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format [RFC 4180]: https://www.rfc-editor.org/rfc/rfc4180 - :param on_wait_timeout: :class:`TimeoutAction` (optional) - When in synchronous mode with `wait_timeout > 0s` it determines the action taken when the timeout is - reached: - - `CONTINUE` → the statement execution continues asynchronously and the call returns a statement ID - immediately. - - `CANCEL` → the statement execution is canceled and the call returns immediately with a `CANCELED` - state. + :param on_wait_timeout: :class:`ExecuteStatementRequestOnWaitTimeout` (optional) + When `wait_timeout > 0s`, the call will block up to the specified time. If the statement execution + doesn't finish within this time, `on_wait_timeout` determines whether the execution should continue + or be canceled. When set to `CONTINUE`, the statement execution continues asynchronously and the + call returns a statement ID which can be used for polling with + :method:statementexecution/getStatement. When set to `CANCEL`, the statement execution is canceled + and the call returns with a `CANCELED` state. :param parameters: List[:class:`StatementParameterListItem`] (optional) A list of parameters to pass into a SQL statement containing parameter markers. A parameter consists of a name, a value, and optionally a type. To represent a NULL value, the `value` field may be - omitted. If the `type` field is omitted, the value is interpreted as a string. + omitted or set to `null` explicitly. If the `type` field is omitted, the value is interpreted as a + string. If the type is given, parameters will be checked for type correctness according to the given type. A value is correct if the provided string can be converted to the requested type using the `cast` function. The exact semantics are described in the section [`cast` function] of the SQL language reference. - For example, the following statement contains two parameters, `my_id` and `my_date`: + For example, the following statement contains two parameters, `my_name` and `my_date`: SELECT * FROM my_table WHERE name = :my_name AND date = :my_date @@ -268,29 +195,34 @@ Statement Execution "parameters": [ { "name": "my_name", "value": "the name" }, { "name": "my_date", "value": "2020-01-01", "type": "DATE" } ] } - Currently, positional parameters denoted by a `?` marker are not supported by the SQL Statement - Execution API. + Currently, positional parameters denoted by a `?` marker are not supported by the Databricks SQL + Statement Execution API. Also see the section [Parameter markers] of the SQL language reference. [Parameter markers]: https://docs.databricks.com/sql/language-manual/sql-ref-parameter-marker.html [`cast` function]: https://docs.databricks.com/sql/language-manual/functions/cast.html :param row_limit: int (optional) - Applies the given row limit to the statement's result set with identical semantics as the SQL - `LIMIT` clause. + Applies the given row limit to the statement's result set, but unlike the `LIMIT` clause in SQL, it + also sets the `truncated` field in the response to indicate whether the result was trimmed due to + the limit or not. :param schema: str (optional) Sets default schema for statement execution, similar to [`USE SCHEMA`] in SQL. [`USE SCHEMA`]: https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-use-schema.html - :param statement: str (optional) - SQL statement to execute :param wait_timeout: str (optional) - The time in seconds the API service will wait for the statement's result set as `Ns`, where `N` can - be set to 0 or to a value between 5 and 50. When set to '0s' the statement will execute in - asynchronous mode. - :param warehouse_id: str (optional) - Warehouse upon which to execute a statement. See also [What are SQL - warehouses?](/sql/admin/warehouse-type.html) + The time in seconds the call will wait for the statement's result set as `Ns`, where `N` can be set + to 0 or to a value between 5 and 50. + + When set to `0s`, the statement will execute in asynchronous mode and the call will not wait for the + execution to finish. In this case, the call returns directly with `PENDING` state and a statement ID + which can be used for polling with :method:statementexecution/getStatement. + + When set between 5 and 50 seconds, the call will behave synchronously up to this timeout and wait + for the statement execution to finish. If the execution finishes within this time, the call returns + immediately with a manifest and result data (or a `FAILED` state in case of an execution error). If + the statement takes longer to execute, `on_wait_timeout` determines what should happen after the + timeout is reached. :returns: :class:`ExecuteStatementResponse` @@ -305,7 +237,7 @@ Statement Execution state set. After at least 12 hours in terminal state, the statement is removed from the warehouse and further calls will receive an HTTP 404 response. - **NOTE** This call currently may take up to 5 seconds to get the latest status and result. + **NOTE** This call currently might take up to 5 seconds to get the latest status and result. :param statement_id: str @@ -316,11 +248,12 @@ Statement Execution Get result chunk by index. - After the statement execution has `SUCCEEDED`, the result data can be fetched by chunks. Whereas the - first chuck with `chunk_index=0` is typically fetched through a `get status` request, subsequent - chunks can be fetched using a `get result` request. The response structure is identical to the nested - `result` element described in the `get status` request, and similarly includes the `next_chunk_index` - and `next_chunk_internal_link` fields for simple iteration through the result set. + After the statement execution has `SUCCEEDED`, this request can be used to fetch any chunk by index. + Whereas the first chunk with `chunk_index=0` is typically fetched with + :method:statementexecution/executeStatement or :method:statementexecution/getStatement, this request + can be used to fetch subsequent chunks. The response structure is identical to the nested `result` + element described in the :method:statementexecution/getStatement request, and similarly includes the + `next_chunk_index` and `next_chunk_internal_link` fields for simple iteration through the result set. :param statement_id: str :param chunk_index: int diff --git a/docs/workspace/workspace-settings.rst b/docs/workspace/workspace-settings.rst index 71e66ac16..21b04c584 100644 --- a/docs/workspace/workspace-settings.rst +++ b/docs/workspace/workspace-settings.rst @@ -7,6 +7,7 @@ Manage security settings for Accounts and Workspaces .. toctree:: :maxdepth: 1 + credentials_manager ip_access_lists token_management tokens