From af5654b951b4c824856d219028a40bb971eff6ef Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Fri, 13 Sep 2024 18:26:20 +0200 Subject: [PATCH] chore: bump hdfs version to 3.4.0 --- .../hbase-hdfs-load-cycling-data/distcp-cycling-data.yaml | 3 ++- .../load-test-data.yaml | 4 +++- .../load-test-data.yaml | 1 + demos/trino-taxi-data/load-test-data.yaml | 1 + stacks/dual-hive-hdfs-s3/hdfs.yaml | 2 +- stacks/end-to-end-security/hdfs.yaml | 8 +++++--- stacks/hdfs-hbase/hdfs.yaml | 3 ++- stacks/jupyterhub-pyspark-hdfs/hdfs.yaml | 2 +- stacks/keycloak-opa-poc/hdfs.yaml | 2 +- 9 files changed, 17 insertions(+), 9 deletions(-) diff --git a/demos/hbase-hdfs-load-cycling-data/distcp-cycling-data.yaml b/demos/hbase-hdfs-load-cycling-data/distcp-cycling-data.yaml index d487b8a6..44300127 100644 --- a/demos/hbase-hdfs-load-cycling-data/distcp-cycling-data.yaml +++ b/demos/hbase-hdfs-load-cycling-data/distcp-cycling-data.yaml @@ -11,7 +11,7 @@ spec: # We use 24.3.0 here which contains the distcp MapReduce components # This is not included in the 24.7 images and will fail. # See: https://github.com/stackabletech/docker-images/issues/793 - image: docker.stackable.tech/stackable/hadoop:3.3.4-stackable24.3.0 + image: docker.stackable.tech/stackable/hadoop:3.4.0-stackable24.11.0 env: - name: HADOOP_USER_NAME value: stackable @@ -19,6 +19,7 @@ spec: value: "/stackable/conf/hdfs" - name: HADOOP_CLASSPATH value: "/stackable/hadoop/share/hadoop/tools/lib/*.jar" + # yamllint disable-line rule:line-length command: ["bash", "-c", "bin/hdfs dfs -mkdir -p /data/raw && bin/hadoop distcp -D fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider s3a://public-backup-nyc-tlc/cycling-tripdata/demo-cycling-tripdata.csv.gz hdfs://hdfs/data/raw"] volumeMounts: - name: config-volume-hdfs diff --git a/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/load-test-data.yaml b/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/load-test-data.yaml index 7add3113..7dd44f89 100644 --- a/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/load-test-data.yaml +++ b/demos/jupyterhub-pyspark-hdfs-anomaly-detection-taxi-data/load-test-data.yaml @@ -8,13 +8,15 @@ spec: spec: containers: - name: load-ny-taxi-data - image: docker.stackable.tech/stackable/hadoop:3.3.4-stackable24.7.0 + image: docker.stackable.tech/stackable/hadoop:3.4.0-stackable24.11.0 + # yamllint disable rule:line-length command: ["bash", "-c", "/stackable/hadoop/bin/hdfs dfs -mkdir -p /ny-taxi-data/raw \ && cd /tmp \ && for month in 2020-09; do \ curl -O https://repo.stackable.tech/repository/misc/ny-taxi-data/fhvhv_tripdata_$month.parquet \ && /stackable/hadoop/bin/hdfs dfs -put fhvhv_tripdata_$month.parquet /ny-taxi-data/raw/; \ done"] + # yamllint enable rule:line-length volumeMounts: - name: hdfs-discovery-configmap mountPath: /hdfs diff --git a/demos/spark-k8s-anomaly-detection-taxi-data/load-test-data.yaml b/demos/spark-k8s-anomaly-detection-taxi-data/load-test-data.yaml index 395249eb..299d473a 100644 --- a/demos/spark-k8s-anomaly-detection-taxi-data/load-test-data.yaml +++ b/demos/spark-k8s-anomaly-detection-taxi-data/load-test-data.yaml @@ -9,6 +9,7 @@ spec: containers: - name: load-ny-taxi-data image: "bitnami/minio:2022-debian-10" + # yamllint disable-line rule:line-length command: ["bash", "-c", "cd /tmp && for month in 2020-09 2020-10 2020-11 2020-12; do curl -O https://repo.stackable.tech/repository/misc/ny-taxi-data/fhvhv_tripdata_$month.parquet && mc --insecure alias set minio http://minio:9000/ $(cat /minio-s3-credentials/accessKey) $(cat /minio-s3-credentials/secretKey) && mc cp fhvhv_tripdata_$month.parquet minio/demo/ny-taxi-data/raw/ && mc mb --ignore-existing minio/prediction; done"] volumeMounts: - name: minio-s3-credentials diff --git a/demos/trino-taxi-data/load-test-data.yaml b/demos/trino-taxi-data/load-test-data.yaml index d0639ab6..4afe0d58 100644 --- a/demos/trino-taxi-data/load-test-data.yaml +++ b/demos/trino-taxi-data/load-test-data.yaml @@ -9,6 +9,7 @@ spec: containers: - name: load-ny-taxi-data image: "bitnami/minio:2024-debian-12" + # yamllint disable-line rule:line-length command: ["bash", "-c", "cd /tmp && for month in 2020-01 2020-02 2020-03 2020-04 2020-05 2020-06 2020-07 2020-08 2020-09 2020-10 2020-11 2020-12 2021-01 2021-02 2021-03 2021-04 2021-05 2021-06 2021-07 2021-08 2021-09 2021-10 2021-11 2021-12 2022-01 2022-02 2022-03 2022-04; do curl -O https://repo.stackable.tech/repository/misc/ny-taxi-data/yellow_tripdata_$month.parquet && mc --insecure alias set minio http://minio:9000/ $(cat /minio-s3-credentials/accessKey) $(cat /minio-s3-credentials/secretKey) && mc cp yellow_tripdata_$month.parquet minio/demo/ny-taxi-data/raw/; done"] volumeMounts: - name: minio-s3-credentials diff --git a/stacks/dual-hive-hdfs-s3/hdfs.yaml b/stacks/dual-hive-hdfs-s3/hdfs.yaml index 386e8793..5c6b6d83 100644 --- a/stacks/dual-hive-hdfs-s3/hdfs.yaml +++ b/stacks/dual-hive-hdfs-s3/hdfs.yaml @@ -25,7 +25,7 @@ metadata: name: hdfs spec: image: - productVersion: 3.3.4 + productVersion: 3.4.0 clusterConfig: listenerClass: external-unstable dfsReplication: 1 diff --git a/stacks/end-to-end-security/hdfs.yaml b/stacks/end-to-end-security/hdfs.yaml index 0330e001..e976e21e 100644 --- a/stacks/end-to-end-security/hdfs.yaml +++ b/stacks/end-to-end-security/hdfs.yaml @@ -1,10 +1,11 @@ +--- apiVersion: hdfs.stackable.tech/v1alpha1 kind: HdfsCluster metadata: name: hdfs spec: image: - productVersion: 3.3.4 + productVersion: 3.4.0 clusterConfig: zookeeperConfigMapName: hdfs-znode authentication: @@ -29,8 +30,9 @@ spec: level: DEBUG configOverrides: &configOverrides core-site.xml: - # The idea is that the user "hive" can't do anything in hdfs, *but* it can impersonate other users - # (such as trino), that have the needed permissions + # The idea is that the user "hive" can't do anything in hdfs, + # *but* it can impersonate other users (such as trino), + # that have the needed permissions hadoop.proxyuser.hive.users: "*" hadoop.proxyuser.hive.hosts: "*" roleGroups: diff --git a/stacks/hdfs-hbase/hdfs.yaml b/stacks/hdfs-hbase/hdfs.yaml index 07649b3b..c65a2b9a 100644 --- a/stacks/hdfs-hbase/hdfs.yaml +++ b/stacks/hdfs-hbase/hdfs.yaml @@ -1,10 +1,11 @@ +--- apiVersion: hdfs.stackable.tech/v1alpha1 kind: HdfsCluster metadata: name: hdfs spec: image: - productVersion: 3.3.4 + productVersion: 3.4.0 clusterConfig: dfsReplication: 1 zookeeperConfigMapName: hdfs-znode diff --git a/stacks/jupyterhub-pyspark-hdfs/hdfs.yaml b/stacks/jupyterhub-pyspark-hdfs/hdfs.yaml index f7835b70..54b0ad69 100644 --- a/stacks/jupyterhub-pyspark-hdfs/hdfs.yaml +++ b/stacks/jupyterhub-pyspark-hdfs/hdfs.yaml @@ -13,7 +13,7 @@ metadata: name: hdfs spec: image: - productVersion: 3.3.4 + productVersion: 3.4.0 clusterConfig: dfsReplication: 1 zookeeperConfigMapName: hdfs-znode diff --git a/stacks/keycloak-opa-poc/hdfs.yaml b/stacks/keycloak-opa-poc/hdfs.yaml index 05eb35d5..30222c36 100644 --- a/stacks/keycloak-opa-poc/hdfs.yaml +++ b/stacks/keycloak-opa-poc/hdfs.yaml @@ -5,7 +5,7 @@ metadata: name: hdfs spec: image: - productVersion: 3.3.4 + productVersion: 3.4.0 clusterConfig: dfsReplication: 1 zookeeperConfigMapName: hdfs-znode