From 03921a6bb6d1fba3af641db6fd2952d0ae4b8ce6 Mon Sep 17 00:00:00 2001 From: Jakub Stejskal Date: Tue, 5 Nov 2024 12:20:26 +0100 Subject: [PATCH] Add test that uses s3 (Minio) as a state backend (#88) * Add test that uses s3 (Minio) as a state backend Signed-off-by: Jakub Stejskal * Adjust config to make it more resilient Signed-off-by: Jakub Stejskal * Some tries to fix TF Signed-off-by: Jakub Stejskal * Debug v1 Signed-off-by: Jakub Stejskal * Use earliest-offset for Flink-kafka client Signed-off-by: Jakub Stejskal * Revert formating Signed-off-by: Jakub Stejskal * Rename SetupMinio to MinioInstaller Signed-off-by: Jakub Stejskal --------- Signed-off-by: Jakub Stejskal --- .../streams/e2e/flink/sql/SqlJobRunnerST.md | 26 ++ .../clients/kafka/StrimziKafkaClients.java | 4 +- .../io/streams/constants/TestConstants.java | 4 + .../operands/minio/MinioInstaller.java | 142 +++++++++ .../templates/KafkaNodePoolTemplate.java | 4 +- .../java/io/streams/sql/TestStatements.java | 6 +- .../java/io/streams/utils/MinioUtils.java | 147 +++++++++ .../streams/e2e/flink/sql/SqlJobRunnerST.java | 280 +++++++++++++++++- 8 files changed, 605 insertions(+), 8 deletions(-) create mode 100644 src/main/java/io/streams/operands/minio/MinioInstaller.java create mode 100644 src/main/java/io/streams/utils/MinioUtils.java diff --git a/docs/io/streams/e2e/flink/sql/SqlJobRunnerST.md b/docs/io/streams/e2e/flink/sql/SqlJobRunnerST.md index cc6e1d1..e01f790 100644 --- a/docs/io/streams/e2e/flink/sql/SqlJobRunnerST.md +++ b/docs/io/streams/e2e/flink/sql/SqlJobRunnerST.md @@ -62,6 +62,32 @@ * `flink` (description file doesn't exist) +## testS3StateBackend + +**Description:** Test verifies that user can use S3 as state backend + +**Steps:** + +| Step | Action | Result | +| - | - | - | +| 1. | Create namespace, serviceaccount and roles for Flink | Resources created | +| 2. | Deploy Apicurio registry | Apicurio registry is up and running | +| 3. | Deploy Kafka my-cluster with scram-sha auth | Kafka is up and running | +| 4. | Create KafkaUser with scram-sha secret | KafkaUser created | +| 5. | Deploy strimzi-kafka-clients producer with payment data generator | Client job is created and data are sent to flink.payment.data topic | +| 6. | Deploy Minio for S3 service | Minio is up and running | +| 7. | Deploy FlinkDeployment with sql which gets data from flink.payment.data topic filter payment of type paypal and send data to flink.payment.paypal topic, for authentication is used secret created by KafkaUser and this secret is passed into by secret interpolation. Flink is configured to use S3 as a state backend | FlinkDeployment is up and tasks are deployed and it sends filtered data into flink.payment.paypal topic, task manager deployed by FlinkDeployment uses S3 | +| 8. | Deploy strimzi-kafka-clients consumer as job and consume messages fromkafka topic flink.payment.paypal | Consumer is deployed and it consumes messages | +| 9. | Verify that messages are present | Messages are present | +| 10. | Verify that taskmanager logs contains 'State backend loader loads the state backend as HashMapStateBackend' | Log message is present | +| 11. | Verify that Minio contains some data from Flink | Flink bucket is not empty | + +**Labels:** + +* `flink-sql-runner` (description file doesn't exist) +* `flink` (description file doesn't exist) + + ## testSimpleFilter **Description:** Test verifies sql-runner.jar works integrated with kafka, apicurio and uses scram-sha for kafka authentication diff --git a/src/main/java/io/streams/clients/kafka/StrimziKafkaClients.java b/src/main/java/io/streams/clients/kafka/StrimziKafkaClients.java index ff910b0..6f0f428 100644 --- a/src/main/java/io/streams/clients/kafka/StrimziKafkaClients.java +++ b/src/main/java/io/streams/clients/kafka/StrimziKafkaClients.java @@ -198,7 +198,7 @@ public JobBuilder defaultProducerStrimzi() { .endEnv() .addNewEnv() .withName("LOG_LEVEL") - .withValue("DEBUG") + .withValue("INFO") .endEnv() .addNewEnv() .withName("MESSAGE_COUNT") @@ -329,7 +329,7 @@ public JobBuilder defaultConsumerStrimzi() { .endEnv() .addNewEnv() .withName("LOG_LEVEL") - .withValue("DEBUG") + .withValue("INFO") .endEnv() .addNewEnv() .withName("MESSAGE_COUNT") diff --git a/src/main/java/io/streams/constants/TestConstants.java b/src/main/java/io/streams/constants/TestConstants.java index 590a4dd..0894a8a 100644 --- a/src/main/java/io/streams/constants/TestConstants.java +++ b/src/main/java/io/streams/constants/TestConstants.java @@ -20,4 +20,8 @@ public interface TestConstants { String STRIMZI_TEST_CLIENTS_LABEL_KEY = "strimzi-test-clients"; String STRIMZI_TEST_CLIENTS_LABEL_VALUE = "true"; + + // Labels + String APP_POD_LABEL = "app"; + String DEPLOYMENT_TYPE = "deployment-type"; } diff --git a/src/main/java/io/streams/operands/minio/MinioInstaller.java b/src/main/java/io/streams/operands/minio/MinioInstaller.java new file mode 100644 index 0000000..3f8ac6c --- /dev/null +++ b/src/main/java/io/streams/operands/minio/MinioInstaller.java @@ -0,0 +1,142 @@ +/* + * Copyright streamshub authors. + * License: Apache License 2.0 (see the file LICENSE or http://apache.org/licenses/LICENSE-2.0.html). + */ +package io.streams.operands.minio; + +import io.fabric8.kubernetes.api.model.EnvVar; +import io.fabric8.kubernetes.api.model.IntOrString; +import io.fabric8.kubernetes.api.model.LabelSelector; +import io.fabric8.kubernetes.api.model.LabelSelectorBuilder; +import io.fabric8.kubernetes.api.model.Service; +import io.fabric8.kubernetes.api.model.ServiceBuilder; +import io.fabric8.kubernetes.api.model.VolumeBuilder; +import io.fabric8.kubernetes.api.model.VolumeMountBuilder; +import io.fabric8.kubernetes.api.model.apps.Deployment; +import io.fabric8.kubernetes.api.model.apps.DeploymentBuilder; +import io.skodjob.testframe.resources.KubeResourceManager; +import io.streams.constants.TestConstants; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Map; + +public class MinioInstaller { + private static final Logger LOGGER = LogManager.getLogger(MinioInstaller.class); + + public static final String MINIO = "minio"; + public static final String ADMIN_CREDS = "minioadminLongerThan16BytesForFIPS"; + public static final String MINIO_STORAGE_ALIAS = "local"; + public static final int MINIO_PORT = 9000; + public static final int MINIO_CONSOLE_PORT = 9090; + private static final String MINIO_IMAGE = "quay.io/minio/minio:latest"; + + /** + * Deploy minio to a specific namespace, creates service for it and init client inside the Minio pod + * @param namespace where Minio will be installed to + */ + public static void deployMinio(String namespace) { + // Create a Minio deployment + Deployment minioDeployment = new DeploymentBuilder() + .withNewMetadata() + .withName(MINIO) + .withNamespace(namespace) + .withLabels(Map.of(TestConstants.DEPLOYMENT_TYPE, MINIO)) + .endMetadata() + .withNewSpec() + .withReplicas(1) + .withNewSelector() + .withMatchLabels(Map.of(TestConstants.APP_POD_LABEL, MINIO)) + .endSelector() + .withNewTemplate() + .withNewMetadata() + .withLabels(Map.of(TestConstants.APP_POD_LABEL, MINIO)) + .endMetadata() + .withNewSpec() + .addNewContainer() + .withName(MINIO) + .withImage(MINIO_IMAGE) + .withArgs("server", "/data", "--console-address", ":" + MINIO_CONSOLE_PORT) + .addToEnv(new EnvVar("MINIO_ROOT_USER", ADMIN_CREDS, null)) + .addToEnv(new EnvVar("MINIO_ROOT_PASSWORD", ADMIN_CREDS, null)) + .addNewPort() + .withContainerPort(MINIO_PORT) + .endPort() + .withVolumeMounts(new VolumeMountBuilder() + .withName("minio-storage") + .withMountPath("/data") + .build()) + .endContainer() + .withVolumes(new VolumeBuilder() + .withName("minio-storage") + .withNewEmptyDir() + .endEmptyDir() + .build()) + .endSpec() + .endTemplate() + .endSpec() + .build(); + + // Create the deployment + KubeResourceManager.getInstance().createResourceWithWait(minioDeployment); + + // Create a service to expose Minio + Service minioService = new ServiceBuilder() + .withNewMetadata() + .withName(MINIO) + .withNamespace(namespace) + .endMetadata() + .withNewSpec() + .withSelector(Map.of(TestConstants.APP_POD_LABEL, MINIO)) + .addNewPort() + .withName("api") + .withPort(MINIO_PORT) + .withTargetPort(new IntOrString(MINIO_PORT)) + .endPort() + .addNewPort() + .withName("console") + .withPort(MINIO_CONSOLE_PORT) + .withTargetPort(new IntOrString(MINIO_CONSOLE_PORT)) + .endPort() + .endSpec() + .build(); + + KubeResourceManager.getInstance().createResourceWithoutWait(minioService); + // NetworkPolicyResource.allowNetworkPolicyAllIngressForMatchingLabel(namespace, MINIO, Map.of(TestConstants.APP_POD_LABEL, MINIO)); + + initMinioClient(namespace); + } + + /** + * Init client inside the Minio pod. This allows other commands to be executed during the tests. + * @param namespace where Minio is installed + */ + private static void initMinioClient(String namespace) { + final LabelSelector labelSelector = new LabelSelectorBuilder().withMatchLabels(Map.of(TestConstants.APP_POD_LABEL, MINIO)).build(); + final String minioPod = KubeResourceManager.getKubeClient().listPods(namespace, labelSelector).get(0).getMetadata().getName(); + + KubeResourceManager.getKubeCmdClient().inNamespace(namespace).execInPod(minioPod, + "mc", + "config", + "host", + "add", + MINIO_STORAGE_ALIAS, + "http://localhost:" + MINIO_PORT, + ADMIN_CREDS, ADMIN_CREDS); + } + + /** + * Create bucket in Minio instance in specific namespace. + * @param namespace Minio location + * @param bucketName name of the bucket that will be created and used within the tests + */ + public static void createBucket(String namespace, String bucketName) { + final LabelSelector labelSelector = new LabelSelectorBuilder().withMatchLabels(Map.of(TestConstants.APP_POD_LABEL, MINIO)).build(); + final String minioPod = KubeResourceManager.getKubeClient().listPods(namespace, labelSelector).get(0).getMetadata().getName(); + + KubeResourceManager.getKubeCmdClient().inNamespace(namespace).execInPod(minioPod, + "mc", + "mb", + MINIO_STORAGE_ALIAS + "/" + bucketName); + } +} diff --git a/src/main/java/io/streams/operands/strimzi/templates/KafkaNodePoolTemplate.java b/src/main/java/io/streams/operands/strimzi/templates/KafkaNodePoolTemplate.java index 28c2079..6270225 100644 --- a/src/main/java/io/streams/operands/strimzi/templates/KafkaNodePoolTemplate.java +++ b/src/main/java/io/streams/operands/strimzi/templates/KafkaNodePoolTemplate.java @@ -27,7 +27,7 @@ public static KafkaNodePoolBuilder defaultKafkaNodePoolPvc(String namespace, Str .withReplicas(replicas) .addAllToRoles(roles) .withNewPersistentClaimStorage() - .withSize("1Gi") + .withSize("5Gi") .withDeleteClaim(true) .endPersistentClaimStorage() .endSpec(); @@ -46,7 +46,7 @@ public static KafkaNodePoolBuilder defaultKafkaNodePoolJbod(String namespace, St .addAllToRoles(roles) .withStorage( new JbodStorageBuilder().addToVolumes( - new PersistentClaimStorageBuilder().withId(0).withSize("1Gi").withDeleteClaim(true) + new PersistentClaimStorageBuilder().withId(0).withSize("5Gi").withDeleteClaim(true) .withKraftMetadata(KRaftMetadataStorage.SHARED).build()) .build()) .endSpec(); diff --git a/src/main/java/io/streams/sql/TestStatements.java b/src/main/java/io/streams/sql/TestStatements.java index 72fb0df..d983097 100644 --- a/src/main/java/io/streams/sql/TestStatements.java +++ b/src/main/java/io/streams/sql/TestStatements.java @@ -177,7 +177,8 @@ public static String getTestFlinkFilter(String bootstrap, String registryUrl, St additionalProperties.put("properties.group.id", "flink-filter-group"); additionalProperties.put("value.format", "avro-confluent"); additionalProperties.put("value.avro-confluent.url", registryUrl); - additionalProperties.put("scan.startup.mode", "latest-offset"); + // Startup mode for Kafka consumer, we set earliest to catch even previously sent messages + additionalProperties.put("scan.startup.mode", "earliest-offset"); additionalProperties.put("properties.security.protocol", "SASL_PLAINTEXT"); additionalProperties.put("properties.sasl.mechanism", "SCRAM-SHA-512"); additionalProperties.put("properties.sasl.jaas.config", @@ -239,7 +240,8 @@ public static String getWrongConnectionSql() { additionalProperties.put("properties.group.id", "flink-filter-group"); additionalProperties.put("value.format", "avro-confluent"); additionalProperties.put("value.avro-confluent.url", "not-exists-sr.cluster.local:5001"); - additionalProperties.put("scan.startup.mode", "latest-offset"); + // Startup mode for Kafka consumer, we set earliest to catch even previously sent messages + additionalProperties.put("scan.startup.mode", "earliest-offset"); SqlWith sqlWith = new SqlWithBuilder() .withSqlStatement(builder.toString()) diff --git a/src/main/java/io/streams/utils/MinioUtils.java b/src/main/java/io/streams/utils/MinioUtils.java new file mode 100644 index 0000000..4f7437a --- /dev/null +++ b/src/main/java/io/streams/utils/MinioUtils.java @@ -0,0 +1,147 @@ +/* + * Copyright streamshub authors. + * License: Apache License 2.0 (see the file LICENSE or http://apache.org/licenses/LICENSE-2.0.html). + */ +package io.streams.utils; + +import io.fabric8.kubernetes.api.model.LabelSelector; +import io.fabric8.kubernetes.api.model.LabelSelectorBuilder; +import io.skodjob.testframe.TestFrameConstants; +import io.skodjob.testframe.resources.KubeResourceManager; +import io.skodjob.testframe.wait.Wait; +import io.streams.constants.TestConstants; +import io.streams.operands.minio.MinioInstaller; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class MinioUtils { + private static final Logger LOGGER = LogManager.getLogger(MinioUtils.class); + + private MinioUtils() { + + } + + /** + * Collect data from Minio about usage of a specific bucket + * + * @param namespace Name of the Namespace where the Minio Pod is running + * @param bucketName Name of the bucket for which we want to get info about its size + * @return Overall statistics about the bucket in String format + */ + public static String getBucketSizeInfo(String namespace, String bucketName) { + final LabelSelector labelSelector = new LabelSelectorBuilder() + .withMatchLabels(Map.of(TestConstants.APP_POD_LABEL, MinioInstaller.MINIO)) + .build(); + final String minioPod = KubeResourceManager.getKubeClient() + .listPods(namespace, labelSelector) + .get(0) + .getMetadata() + .getName(); + + return KubeResourceManager.getKubeCmdClient() + .inNamespace(namespace) + .execInPod(minioPod, + "mc", + "stat", + "local/" + bucketName) + .out(); + } + + /** + * Parse out total size of bucket from the information about usage. + * + * @param bucketInfo String containing all stat info about bucket + * @return Map consists of parsed size and it's unit + */ + private static Map parseTotalSize(String bucketInfo) { + Pattern pattern = Pattern.compile("Total size:\\s*(?[\\d.]+)\\s*(?.*)"); + Matcher matcher = pattern.matcher(bucketInfo); + + if (matcher.find()) { + return Map.of("size", Double.parseDouble(matcher.group("size")), "unit", matcher.group("unit")); + } else { + throw new IllegalArgumentException("Total size not found in the provided string"); + } + } + + /** + * Parse out total size of bucket from the information about usage. + * + * @param bucketInfo String containing all stat info about bucket + * @return Object counts in the bucket + */ + private static int parseObjectCount(String bucketInfo) { + Pattern pattern = Pattern.compile("Objects count:\\s*(?[\\d.]+)"); + Matcher matcher = pattern.matcher(bucketInfo); + + if (matcher.find()) { + return Integer.parseInt(matcher.group("count")); + } else { + throw new IllegalArgumentException("Objects count not found in the provided string"); + } + } + + /** + * Wait until size of the bucket is not 0 B. + * + * @param namespace Minio location + * @param bucketName bucket name + */ + public static void waitForDataInMinio(String namespace, String bucketName) { + Wait.until("data sync from Kafka to Minio", + TestFrameConstants.GLOBAL_POLL_INTERVAL_MEDIUM, + TestFrameConstants.GLOBAL_TIMEOUT, + () -> { + String bucketSizeInfo = getBucketSizeInfo(namespace, bucketName); + Map parsedSize = parseTotalSize(bucketSizeInfo); + double bucketSize = (Double) parsedSize.get("size"); + LOGGER.info("Collected bucket size: {} {}", bucketSize, parsedSize.get("unit")); + LOGGER.debug("Collected bucket info:\n{}", bucketSizeInfo); + + return bucketSize > 0; + }); + } + + /** + * Wait until size of the bucket is not 0 B. + * + * @param namespace Minio location + * @param bucketName bucket name + */ + public static void waitForObjectsInMinio(String namespace, String bucketName) { + Wait.until("data sync from Kafka to Minio", + TestFrameConstants.GLOBAL_POLL_INTERVAL_MEDIUM, + TestFrameConstants.GLOBAL_TIMEOUT, + () -> { + String bucketSizeInfo = getBucketSizeInfo(namespace, bucketName); + int objectCount = parseObjectCount(bucketSizeInfo); + LOGGER.info("Collected object count: {}", objectCount); + LOGGER.debug("Collected bucket info:\n{}", bucketSizeInfo); + + return objectCount > 0; + }); + } + + + /** + * Wait until bucket is empty. + * + * @param namespace Minio location + * @param bucketName bucket name + */ + public static void waitForNoDataInMinio(String namespace, String bucketName) { + Wait.until("data deletion in Minio", TestFrameConstants.GLOBAL_POLL_INTERVAL_MEDIUM, TestFrameConstants.GLOBAL_TIMEOUT, () -> { + String bucketSizeInfo = getBucketSizeInfo(namespace, bucketName); + Map parsedSize = parseTotalSize(bucketSizeInfo); + double bucketSize = (Double) parsedSize.get("size"); + LOGGER.info("Collected bucket size: {} {}", bucketSize, parsedSize.get("unit")); + LOGGER.debug("Collected bucket info:\n{}", bucketSizeInfo); + + return bucketSize == 0; + }); + } +} diff --git a/src/test/java/io/streams/e2e/flink/sql/SqlJobRunnerST.java b/src/test/java/io/streams/e2e/flink/sql/SqlJobRunnerST.java index 7d247c7..2405fe2 100644 --- a/src/test/java/io/streams/e2e/flink/sql/SqlJobRunnerST.java +++ b/src/test/java/io/streams/e2e/flink/sql/SqlJobRunnerST.java @@ -24,6 +24,7 @@ import io.streams.operands.flink.resoruces.FlinkDeploymentType; import io.streams.operands.flink.templates.FlinkDeploymentTemplate; import io.streams.operands.flink.templates.FlinkRBAC; +import io.streams.operands.minio.MinioInstaller; import io.streams.operands.strimzi.resources.KafkaType; import io.streams.operands.strimzi.templates.KafkaNodePoolTemplate; import io.streams.operands.strimzi.templates.KafkaTemplate; @@ -31,6 +32,7 @@ import io.streams.operators.EOperator; import io.streams.operators.OperatorInstaller; import io.streams.sql.TestStatements; +import io.streams.utils.MinioUtils; import io.streams.utils.StrimziClientUtils; import io.streams.utils.TestUtils; import io.streams.utils.kube.JobUtils; @@ -44,6 +46,7 @@ import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -367,6 +370,7 @@ void testWrongConnectionInfo() { @Test void testFRocksDbStateBackend() { String namespace = "flink-state-backend"; + String flinkDeploymentName = namespace; String kafkaUser = "test-user"; // Create namespace KubeResourceManager.getInstance().createOrUpdateResourceWithWait( @@ -468,7 +472,7 @@ void testFRocksDbStateBackend() { // Deploy flink with test filter sql statement which filter to specific topic only payment type paypal // Modify flink default deployment with state backend and pvc configuration FlinkDeployment flink = FlinkDeploymentTemplate.defaultFlinkDeployment(namespace, - "flink-state-backend", List.of(TestStatements.getTestFlinkFilter( + flinkDeploymentName, List.of(TestStatements.getTestFlinkFilter( bootstrapServerAuth, registryUrl, kafkaUser, namespace))) .editSpec() .addToFlinkConfiguration( @@ -509,7 +513,7 @@ void testFRocksDbStateBackend() { Wait.until("Task manager contains info about rocksdb", TestFrameConstants.GLOBAL_POLL_INTERVAL_LONG, TestFrameConstants.GLOBAL_TIMEOUT, () -> { List taskManagerPods = KubeResourceManager.getKubeClient() - .listPodsByPrefixInName(namespace, namespace + "-taskmanager"); + .listPodsByPrefixInName(namespace, flinkDeploymentName + "-taskmanager"); for (Pod p : taskManagerPods) { return KubeResourceManager.getKubeClient().getLogsFromPod(namespace, p.getMetadata().getName()) .contains("State backend loader loads the state backend as EmbeddedRocksDBStateBackend"); @@ -544,4 +548,276 @@ void testFRocksDbStateBackend() { assertTrue(log.contains("\"type\":\"paypal\"")); assertFalse(log.contains("\"type\":\"creditCard\"")); } + + @TestDoc( + description = @Desc("Test verifies that user can use S3 as state backend"), + steps = { + @Step(value = "Create namespace, serviceaccount and roles for Flink", expected = "Resources created"), + @Step(value = "Deploy Apicurio registry", expected = "Apicurio registry is up and running"), + @Step(value = "Deploy Kafka my-cluster with scram-sha auth", expected = "Kafka is up and running"), + @Step(value = "Create KafkaUser with scram-sha secret", expected = "KafkaUser created"), + @Step(value = "Deploy strimzi-kafka-clients producer with payment data generator", + expected = "Client job is created and data are sent to flink.payment.data topic"), + @Step(value = "Deploy Minio for S3 service", expected = "Minio is up and running"), + @Step(value = "Deploy FlinkDeployment with sql which gets data from flink.payment.data topic filter " + + "payment of type paypal and send data to flink.payment.paypal topic, for authentication is used " + + "secret created by KafkaUser and this secret is passed into by secret interpolation. Flink is " + + "configured to use S3 as a state backend", + expected = "FlinkDeployment is up and tasks are deployed and it sends filtered " + + "data into flink.payment.paypal topic, task manager deployed by FlinkDeployment uses " + + "S3"), + @Step(value = "Deploy strimzi-kafka-clients consumer as job and consume messages from" + + "kafka topic flink.payment.paypal", + expected = "Consumer is deployed and it consumes messages"), + @Step(value = "Verify that messages are present", expected = "Messages are present"), + @Step(value = "Verify that taskmanager logs contains 'State backend loader loads the state " + + "backend as HashMapStateBackend'", expected = "Log message is present"), + @Step(value = "Verify that Minio contains some data from Flink", expected = "Flink bucket is not empty") + }, + labels = { + @Label(value = FLINK_SQL_RUNNER), + @Label(value = FLINK), + } + ) + @Test + void testS3StateBackend() { + String namespace = "flink-s3-state-backend"; + String flinkDeploymentName = "flink-state-backend"; + String kafkaUser = "test-user"; + String bucketName = "flink-bucket"; + + // Create namespace + KubeResourceManager.getInstance() + .createOrUpdateResourceWithWait( + new NamespaceBuilder().withNewMetadata() + .withName(namespace) + .endMetadata() + .build()); + + // Add flink RBAC + KubeResourceManager.getInstance() + .createOrUpdateResourceWithWait( + FlinkRBAC.getFlinkRbacResources(namespace) + .toArray(new HasMetadata[0])); + + // Create kafka + KubeResourceManager.getInstance() + .createOrUpdateResourceWithWait( + KafkaNodePoolTemplate.defaultKafkaNodePoolJbod(namespace, "dual-role", + 3, kafkaClusterName, List.of(ProcessRoles.BROKER, ProcessRoles.CONTROLLER)) + .build()); + + KubeResourceManager.getInstance() + .createOrUpdateResourceWithWait( + KafkaTemplate.defaultKafka(namespace, kafkaClusterName) + .editSpec() + .editKafka() + .withListeners( + new GenericKafkaListenerBuilder() + .withName("plain") + .withTls(false) + .withType(KafkaListenerType.INTERNAL) + .withPort((9092)) + .withAuth(new KafkaListenerAuthenticationScramSha512()) + .build(), + new GenericKafkaListenerBuilder() + .withName("unsecure") + .withTls(false) + .withType(KafkaListenerType.INTERNAL) + .withPort((9094)) + .build() + ) + .endKafka() + .endSpec() + .build()); + + // Create topic for ksql apicurio + KubeResourceManager.getInstance() + .createOrUpdateResourceWithWait( + ApicurioRegistryTemplate.apicurioKsqlTopic(namespace, kafkaClusterName, 3)); + + // Create kafka scram sha user + KubeResourceManager.getInstance() + .createOrUpdateResourceWithWait( + KafkaUserTemplate.defaultKafkaUser(namespace, kafkaUser, kafkaClusterName) + .editSpec() + .withAuthentication(new KafkaUserScramSha512ClientAuthentication()) + .endSpec() + .build()); + + String bootstrapServerAuth = KafkaType.kafkaClient() + .inNamespace(namespace) + .withName(kafkaClusterName) + .get() + .getStatus() + .getListeners() + .stream() + .filter(l -> l.getName() + .equals("plain")) + .findFirst() + .get() + .getBootstrapServers(); + String bootstrapServerUnsecure = KafkaType.kafkaClient() + .inNamespace(namespace) + .withName(kafkaClusterName) + .get() + .getStatus() + .getListeners() + .stream() + .filter(l -> l.getName() + .equals("unsecure")) + .findFirst() + .get() + .getBootstrapServers(); + + // Add apicurio + KubeResourceManager.getInstance() + .createOrUpdateResourceWithWait( + ApicurioRegistryTemplate.defaultApicurioRegistry("apicurio-registry", namespace, + bootstrapServerUnsecure) + .build()); + + // Get user secret jaas configuration + final String saslJaasConfigEncrypted = KubeResourceManager.getKubeClient() + .getClient() + .secrets() + .inNamespace(namespace) + .withName(kafkaUser) + .get() + .getData() + .get("sasl.jaas.config"); + final String saslJaasConfigDecrypted = TestUtils.decodeFromBase64(saslJaasConfigEncrypted); + + // Run internal producer and produce data + String producerName = "kafka-producer"; + StrimziKafkaClients kafkaProducerClient = new StrimziKafkaClientsBuilder() + .withProducerName(producerName) + .withNamespaceName(namespace) + .withTopicName("flink.payment.data") + .withBootstrapAddress(bootstrapServerAuth) + .withMessageCount(30000) + .withUsername(kafkaUser) + .withDelayMs(1) + .withMessageTemplate("payment_fiat") + .withAdditionalConfig( + StrimziClientUtils.getApicurioAdditionalProperties(AvroKafkaSerializer.class.getName(), + "http://apicurio-registry-service." + namespace + ".svc:8080/apis/registry/v2") + "\n" + + "sasl.mechanism=SCRAM-SHA-512\n" + + "security.protocol=SASL_PLAINTEXT\n" + + "sasl.jaas.config=" + saslJaasConfigDecrypted + ) + .build(); + + KubeResourceManager.getInstance() + .createResourceWithWait( + kafkaProducerClient.producerStrimzi() + ); + + String registryUrl = "http://apicurio-registry-service." + namespace + ".svc:8080/apis/ccompat/v6"; + + // Add Minio + MinioInstaller.deployMinio(namespace); + MinioInstaller.createBucket(namespace, bucketName); + + // Deploy flink with test filter sql statement which filter to specific topic only payment type paypal + // Modify flink default deployment with state backend and pvc configuration + HashMap flinkConfig = new HashMap(); + flinkConfig.put("execution.checkpointing.interval", "10000"); + flinkConfig.put("execution.checkpointing.snapshot-compression", "true"); + flinkConfig.put("kubernetes.operator.job.restart.failed", "true"); + // rocksdb can be used as a state backend but the location is referenced in s3 instead on local pvc + flinkConfig.put("state.backend", "rocksdb"); + flinkConfig.put("state.checkpoints.dir", "s3://" + bucketName + "/" + MinioInstaller.MINIO + ":" + MinioInstaller.MINIO_PORT); + flinkConfig.put("state.savepoints.dir", "s3://" + bucketName + "/" + MinioInstaller.MINIO + ":" + MinioInstaller.MINIO_PORT); + // Currently Minio is deployed only in HTTP mode so we need to specify http in the url + flinkConfig.put("s3.endpoint", "http://" + MinioInstaller.MINIO + ":" + MinioInstaller.MINIO_PORT); + // This should be set to make sure Flink will properly work with Minio + flinkConfig.put("s3.path.style.access", "true"); + flinkConfig.put("s3.access-key", MinioInstaller.ADMIN_CREDS); + flinkConfig.put("s3.secret-key", MinioInstaller.ADMIN_CREDS); + + FlinkDeployment flink = FlinkDeploymentTemplate.defaultFlinkDeployment(namespace, + flinkDeploymentName, List.of(TestStatements.getTestFlinkFilter( + bootstrapServerAuth, registryUrl, kafkaUser, namespace))) + .editSpec() + .addToFlinkConfiguration( + flinkConfig + ) + .editPodTemplate() + .editOrNewSpec() + .editFirstContainer() + .endFlinkdeploymentspecContainer() + .endFlinkdeploymentspecSpec() + .endPodTemplate() + .endSpec() + .build(); + KubeResourceManager.getInstance() + .createOrUpdateResourceWithWait(flink); + + JobUtils.waitForJobSuccess(namespace, kafkaProducerClient.getProducerName(), + TestFrameConstants.GLOBAL_TIMEOUT_MEDIUM); + + //Check task manager log for presence checkpoint configuration + Wait.until("Task manager contains info about state.backend", TestFrameConstants.GLOBAL_POLL_INTERVAL_LONG, + TestFrameConstants.GLOBAL_TIMEOUT, () -> { + List taskManagerPods = KubeResourceManager.getKubeClient() + .listPodsByPrefixInName(namespace, flinkDeploymentName + "-taskmanager"); + for (Pod p : taskManagerPods) { + return KubeResourceManager.getKubeClient() + .getLogsFromPod(namespace, p.getMetadata() + .getName()) + .contains("State backend loader loads the state backend as EmbeddedRocksDBStateBackend"); + } + return false; + }); + + // Run consumer and check if data are filtered + String consumerName = "kafka-consumer"; + StrimziKafkaClients kafkaConsumerClient = new StrimziKafkaClientsBuilder() + .withConsumerName(consumerName) + .withNamespaceName(namespace) + .withTopicName("flink.payment.paypal") + .withBootstrapAddress(bootstrapServerAuth) + .withMessageCount(100) + .withAdditionalConfig( + "sasl.mechanism=SCRAM-SHA-512\n" + + "security.protocol=SASL_PLAINTEXT\n" + + "sasl.jaas.config=" + saslJaasConfigDecrypted + ) + .withConsumerGroup("flink-filter-test-group") + .build(); + + KubeResourceManager.getInstance() + .createResourceWithWait( + kafkaConsumerClient.consumerStrimzi() + ); + + JobUtils.waitForJobSuccess(namespace, kafkaConsumerClient.getConsumerName(), + TestFrameConstants.GLOBAL_TIMEOUT_MEDIUM); + String consumerPodName = KubeResourceManager.getKubeClient() + .listPodsByPrefixInName(namespace, consumerName) + .get(0) + .getMetadata() + .getName(); + String log = KubeResourceManager.getKubeClient() + .getLogsFromPod(namespace, consumerPodName); + assertTrue(log.contains("\"type\":\"paypal\"")); + assertFalse(log.contains("\"type\":\"creditCard\"")); + + MinioUtils.waitForObjectsInMinio(namespace, bucketName); + String flinkDeploymentPodName = KubeResourceManager.getKubeClient() + .listPodsByPrefixInName(namespace, flinkDeploymentName) + .stream() + .filter(pod -> !pod.getMetadata() + .getName() + .contains("taskmanager")) + .toList() + .get(0) + .getMetadata() + .getName(); + + log = KubeResourceManager.getKubeClient().getLogsFromPod(namespace, flinkDeploymentPodName); + assertTrue(log.contains("Committing minio:9000")); + assertTrue(log.contains("Marking checkpoint 1 as completed for source Source: payment_fiat")); + } }