diff --git a/demos/data-lakehouse-iceberg-trino-spark/create-nifi-ingestion-job.yaml b/demos/data-lakehouse-iceberg-trino-spark/create-nifi-ingestion-job.yaml
index 59968ee2..e86947df 100644
--- a/demos/data-lakehouse-iceberg-trino-spark/create-nifi-ingestion-job.yaml
+++ b/demos/data-lakehouse-iceberg-trino-spark/create-nifi-ingestion-job.yaml
@@ -10,7 +10,14 @@ spec:
initContainers:
- name: wait-for-kafka
image: oci.stackable.tech/sdp/tools:1.0.0-stackable0.0.0-dev
- command: ["bash", "-c", "echo 'Waiting for all kafka brokers to be ready' && kubectl wait --for=condition=ready --timeout=30m pod -l app.kubernetes.io/instance=kafka -l app.kubernetes.io/name=kafka"]
+ command:
+ - bash
+ - -euo
+ - pipefail
+ - -c
+ - |
+ echo 'Waiting for all kafka brokers to be ready'
+ kubectl wait --for=condition=ready --timeout=30m pod -l app.kubernetes.io/instance=kafka,app.kubernetes.io/name=kafka
containers:
- name: create-nifi-ingestion-job
image: oci.stackable.tech/sdp/testing-tools:0.2.0-stackable0.0.0-dev
@@ -19,7 +26,8 @@ spec:
- -euo
- pipefail
- -c
- - python -u /tmp/script/script.py
+ - |
+ python -u /tmp/script/script.py
volumeMounts:
- name: script
mountPath: /tmp/script
@@ -53,8 +61,8 @@ data:
import requests
import urllib3
- # As of 2022-08-29 we cant use "https://nifi:8443" here because
The request contained an invalid host header [nifi:8443
] in the request [/nifi-api
]. Check for request manipulation or third-party intercept.
- ENDPOINT = f"https://nifi-node-default-0.nifi-node-default.{os.environ['NAMESPACE']}.svc.cluster.local:8443" # For local testing / developing replace it, afterwards change back to f"https://nifi-node-default-0.nifi-node-default.{os.environ['NAMESPACE']}.svc.cluster.local:8443"
+ # As of 2022-08-29 we cant use "https://nifi-node:8443" here because The request contained an invalid host header [nifi:8443
] in the request [/nifi-api
]. Check for request manipulation or third-party intercept.
+ ENDPOINT = f"https://nifi-node-default-0.nifi-node-default-headless.{os.environ['NAMESPACE']}.svc.cluster.local:8443" # For local testing / developing replace it, afterwards change back to f"https://nifi-node-default-0.nifi-node-default-headless.{os.environ['NAMESPACE']}.svc.cluster.local:8443"
USERNAME = "admin"
PASSWORD = open("/nifi-admin-credentials-secret/admin").read()
diff --git a/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml b/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml
index 739ceeed..10271f25 100644
--- a/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml
+++ b/demos/data-lakehouse-iceberg-trino-spark/create-spark-ingestion-job.yaml
@@ -13,11 +13,26 @@ spec:
initContainers:
- name: wait-for-kafka
image: oci.stackable.tech/sdp/tools:1.0.0-stackable0.0.0-dev
- command: ["bash", "-c", "echo 'Waiting for all kafka brokers to be ready' && kubectl wait --for=condition=ready --timeout=30m pod -l app.kubernetes.io/name=kafka -l app.kubernetes.io/instance=kafka"]
+ command:
+ - bash
+ - -euo
+ - pipefail
+ - -c
+ - |
+ echo 'Waiting for all minio instances to be ready'
+ kubectl wait --for=condition=ready --timeout=30m pod -l app=minio,release=minio,stackable.tech/vendor=Stackable
+ echo 'Waiting for all kafka brokers to be ready'
+ kubectl wait --for=condition=ready --timeout=30m pod -l app.kubernetes.io/name=kafka,app.kubernetes.io/instance=kafka
containers:
- name: create-spark-ingestion-job
image: oci.stackable.tech/sdp/tools:1.0.0-stackable0.0.0-dev
- command: ["bash", "-c", "echo 'Submitting Spark job' && kubectl apply -f /tmp/manifest/spark-ingestion-job.yaml"]
+ command:
+ - bash
+ - -euo
+ - pipefail
+ - -c
+ - |
+ echo 'Submitting Spark job' && kubectl apply -f /tmp/manifest/spark-ingestion-job.yaml
volumeMounts:
- name: manifest
mountPath: /tmp/manifest
@@ -56,7 +71,7 @@ data:
spark.sql.extensions: org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
spark.sql.catalog.lakehouse: org.apache.iceberg.spark.SparkCatalog
spark.sql.catalog.lakehouse.type: hive
- spark.sql.catalog.lakehouse.uri: thrift://hive-iceberg:9083
+ spark.sql.catalog.lakehouse.uri: thrift://hive-iceberg-metastore:9083
# Every merge into statements creates 8 files.
# Paralleling is enough for the demo, might need to be increased (or omitted entirely) when merge larger data volumes
spark.sql.shuffle.partitions: "8"
diff --git a/demos/data-lakehouse-iceberg-trino-spark/create-trino-tables.yaml b/demos/data-lakehouse-iceberg-trino-spark/create-trino-tables.yaml
index 5a0f5ae9..a1ce4d7f 100644
--- a/demos/data-lakehouse-iceberg-trino-spark/create-trino-tables.yaml
+++ b/demos/data-lakehouse-iceberg-trino-spark/create-trino-tables.yaml
@@ -10,7 +10,14 @@ spec:
initContainers:
- name: wait-for-testdata
image: oci.stackable.tech/sdp/tools:1.0.0-stackable0.0.0-dev
- command: ["bash", "-c", "echo 'Waiting for job load-test-data to finish' && kubectl wait --for=condition=complete --timeout=30m job/load-test-data"]
+ command:
+ - bash
+ - -euo
+ - pipefail
+ - -c
+ - |
+ echo 'Waiting for job load-test-data to finish'
+ kubectl wait --for=condition=complete --timeout=30m job/load-test-data
containers:
- name: create-tables-in-trino
image: oci.stackable.tech/sdp/testing-tools:0.2.0-stackable0.0.0-dev
diff --git a/demos/data-lakehouse-iceberg-trino-spark/serviceaccount.yaml b/demos/data-lakehouse-iceberg-trino-spark/serviceaccount.yaml
index cb3ea626..53e2ccae 100644
--- a/demos/data-lakehouse-iceberg-trino-spark/serviceaccount.yaml
+++ b/demos/data-lakehouse-iceberg-trino-spark/serviceaccount.yaml
@@ -31,6 +31,14 @@ rules:
- get
- list
- watch
+ - apiGroups:
+ - apps
+ resources:
+ - statefulsets
+ verbs:
+ - get
+ - list
+ - watch
- apiGroups:
- batch
resources:
diff --git a/demos/data-lakehouse-iceberg-trino-spark/setup-superset.yaml b/demos/data-lakehouse-iceberg-trino-spark/setup-superset.yaml
index 187f94ee..4a14c4f5 100644
--- a/demos/data-lakehouse-iceberg-trino-spark/setup-superset.yaml
+++ b/demos/data-lakehouse-iceberg-trino-spark/setup-superset.yaml
@@ -9,7 +9,14 @@ spec:
containers:
- name: setup-superset
image: oci.stackable.tech/sdp/testing-tools:0.2.0-stackable0.0.0-dev
- command: ["bash", "-c", "curl -o superset-assets.zip https://raw.githubusercontent.com/stackabletech/demos/main/demos/data-lakehouse-iceberg-trino-spark/superset-assets.zip && python -u /tmp/script/script.py"]
+ command:
+ - bash
+ - -euo
+ - pipefail
+ - -c
+ - |
+ curl -o superset-assets.zip https://raw.githubusercontent.com/stackabletech/demos/main/demos/data-lakehouse-iceberg-trino-spark/superset-assets.zip
+ python -u /tmp/script/script.py
volumeMounts:
- name: script
mountPath: /tmp/script
@@ -39,7 +46,7 @@ data:
import logging
import requests
- base_url = "http://superset-node-default:8088" # For local testing / developing replace it, afterwards change back to http://superset-node-default:8088
+ base_url = "http://superset-node:8088" # For local testing / developing replace it, afterwards change back to http://superset-node:8088
superset_username = open("/superset-credentials/adminUser.username").read()
superset_password = open("/superset-credentials/adminUser.password").read()
trino_username = "admin"
diff --git a/demos/data-lakehouse-iceberg-trino-spark/superset-assets.zip b/demos/data-lakehouse-iceberg-trino-spark/superset-assets.zip
index 4f36c971..ebe610be 100644
Binary files a/demos/data-lakehouse-iceberg-trino-spark/superset-assets.zip and b/demos/data-lakehouse-iceberg-trino-spark/superset-assets.zip differ
diff --git a/stacks/data-lakehouse-iceberg-trino-spark/nifi.yaml b/stacks/data-lakehouse-iceberg-trino-spark/nifi.yaml
index 58a4c2db..6c067d79 100644
--- a/stacks/data-lakehouse-iceberg-trino-spark/nifi.yaml
+++ b/stacks/data-lakehouse-iceberg-trino-spark/nifi.yaml
@@ -9,18 +9,19 @@ spec:
clusterConfig:
authentication:
- authenticationClass: nifi-admin-credentials
- listenerClass: external-unstable
sensitiveProperties:
keySecret: nifi-sensitive-property-key
autoGenerate: true
nodes:
+ roleConfig:
+ listenerClass: external-unstable
config:
resources:
cpu:
min: "2"
max: "4"
memory:
- limit: '6Gi'
+ limit: "6Gi"
storage:
contentRepo:
capacity: "10Gi"
@@ -51,4 +52,4 @@ kind: Secret
metadata:
name: nifi-admin-credentials-secret
stringData:
- admin: {{ nifiAdminPassword }}
+ admin: "{{ nifiAdminPassword }}"
diff --git a/stacks/data-lakehouse-iceberg-trino-spark/trino.yaml b/stacks/data-lakehouse-iceberg-trino-spark/trino.yaml
index 0716844e..2848e35b 100644
--- a/stacks/data-lakehouse-iceberg-trino-spark/trino.yaml
+++ b/stacks/data-lakehouse-iceberg-trino-spark/trino.yaml
@@ -7,7 +7,6 @@ spec:
image:
productVersion: "476"
clusterConfig:
- listenerClass: external-unstable
catalogLabelSelector:
matchLabels:
trino: trino
@@ -18,6 +17,8 @@ spec:
configMapName: opa
package: trino
coordinators:
+ roleConfig:
+ listenerClass: external-unstable
config:
queryMaxMemory: 10TB
resources:
@@ -25,7 +26,7 @@ spec:
min: "1"
max: "4"
memory:
- limit: '6Gi'
+ limit: "6Gi"
roleGroups:
default:
replicas: 1
@@ -37,7 +38,7 @@ spec:
min: "2"
max: "6"
memory:
- limit: '20Gi'
+ limit: "20Gi"
roleGroups:
default:
replicas: 4