Skip to content

Latest commit

 

History

History
158 lines (132 loc) · 4.15 KB

README.md

File metadata and controls

158 lines (132 loc) · 4.15 KB

stackql-databricks-openapi

Build databricks_account and databricks_workspace providers for stackql using the databricks web documentation:

usage

The program requires selenium and the chromedriver for windows, use PowerShell to run the following code to extract web doc data into machine readable staging documents, the staging documents are then converted into tagged OpenAPI specification documents organized by service:

python -m venv venv
.\venv\Scripts\Activate
pip install -r requirements.txt
pip freeze
# scrape web docs
python .\process_web_docs.py account --clean --debug 
python .\process_web_docs.py workspace --clean --debug
# generate openapi specs
python .\generate_openapi_specs.py account --clean --debug
python .\generate_openapi_specs.py workspace --clean --debug

deactivate
Remove-Item -Recurse -Force ./venv

tests

To run tests locally, clone stackql-provider-tests, and run locally:

# run from the directory you cloned into
cd /mnt/c/LocalGitRepos/stackql/core/stackql-provider-tests
# test account
bash test-provider.sh \
databricks_account \
false \
/mnt/c/LocalGitRepos/stackql/openapi-conversion/stackql-databricks-openapi/openapi_providers \
true
# test workspace
bash test-provider.sh \
databricks_workspace \
false \
/mnt/c/LocalGitRepos/stackql/openapi-conversion/stackql-databricks-openapi/openapi_providers \
true
# back to starting dir
cd /mnt/c/LocalGitRepos/stackql/openapi-conversion/stackql-databricks-openapi

inspect

curl -L https://bit.ly/stackql-zip -O && unzip stackql-zip
PROVIDER_REGISTRY_ROOT_DIR="$(pwd)/openapi_providers"
REG_STR='{"url": "file://'${PROVIDER_REGISTRY_ROOT_DIR}'", "localDocRoot": "'${PROVIDER_REGISTRY_ROOT_DIR}'", "verifyConfig": {"nopVerify": true}}'
./stackql shell --registry="${REG_STR}"

some test queries...

SELECT 
u.id,
displayName, 
userName, 
active 
FROM databricks_account.iam.users u, JSON_EACH(roles)
WHERE account_id = 'ebfcc5a9-9d49-4c93-b651-b3ee6cf1c9ce'
AND JSON_EXTRACT(json_each.value, '$.value') = 'account_admin';
select 
gr.id, 
displayName, 
json_extract(json_each.value, '$.value') as entitlement 
from databricks_workspace.iam.groups gr, JSON_EACH(entitlements) 
where deployment_name = 'dbc-ddbc0f51-c9cf';
SELECT 
sp.id,
active,
applicationId,
displayName,
externalId
FROM databricks_account.iam.service_principals sp, JSON_EACH(roles)
WHERE account_id = 'ebfcc5a9-9d49-4c93-b651-b3ee6cf1c9ce'
AND JSON_EXTRACT(json_each.value, '$.value') = 'account_admin';
select 
workspace_id,
workspace_name,
deployment_name,
workspace_status,
pricing_tier, 
aws_region, 
credentials_id, 
storage_configuration_id
from
databricks_account.provisioning.workspaces where account_id = 'ebfcc5a9-9d49-4c93-b651-b3ee6cf1c9ce';
select 
cluster_id,
aws_attributes,
node_type_id,
state
from  
databricks_workspace.compute.clusters 
where deployment_name = 'dbc-ddbc0f51-c9cf';
select
*
from databricks_account.provisioning.vw_workspaces 
where account_id = 'ebfcc5a9-9d49-4c93-b651-b3ee6cf1c9ce' 

check for new routes

python .\find_new_routes.py workspace
# or
python3 .\find_new_routes.py account

debugging with curl

DATABRICKS_TOKEN=$(curl --request POST "https://accounts.cloud.databricks.com/oidc/accounts/${DATABRICKS_ACCOUNT_ID}/v1/token" \
  --header "Content-Type: application/x-www-form-urlencoded" \
  --data-urlencode "grant_type=client_credentials" \
  --data-urlencode "client_id=${DATABRICKS_CLIENT_ID}" \
  --data-urlencode "client_secret=${DATABRICKS_CLIENT_SECRET}" \
  --data-urlencode "scope=all-apis" | jq -r .access_token)

curl --request GET "https://accounts.cloud.databricks.com/api/2.0/accounts/${DATABRICKS_ACCOUNT_ID}/workspaces" \
  --header "Authorization: Bearer ${DATABRICKS_TOKEN}" \
  -vvv \
  --header "Accept: application/json"

generate user (web) docs

bash generate_user_docs.sh account
bash generate_user_docs.sh workspace