From 2cc8506413c31a6543b62bd63b58cb8f1eca78fd Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Thu, 28 Mar 2019 09:00:43 +0200 Subject: [PATCH 01/20] Added gitignore --- .gitignore | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..108e0cc --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +# Eclipse +.classpath +.project +.settings/ + +# Intellij +.idea/ +*.iml +*.iws + +# Mac +.DS_Store + +# Maven +log/ +target/ \ No newline at end of file From 9948d9245393d2f7a8fa6e49269f248da131547c Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Thu, 28 Mar 2019 09:16:18 +0200 Subject: [PATCH 02/20] JUnit workaround due to KAFKA-6647 causing test to fail in Windows --- .../utils/TestEmbeddedKafkaCluster.java | 45 ++++++++++++++++++ .../integration/utils/TestKafkaStreams.java | 47 +++++++++++++++++++ ...L4J_DeepLearning_Iris_IntegrationTest.java | 3 +- ..._DeepLearning_Example_IntegrationTest.java | 3 +- ...rning_H2O_GBM_Example_IntegrationTest.java | 3 +- ...e_Recognition_Example_IntegrationTest.java | 6 ++- ...sorFlow_Keras_Example_IntegrationTest.java | 6 ++- 7 files changed, 106 insertions(+), 7 deletions(-) create mode 100644 src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java create mode 100644 src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java diff --git a/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java b/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java new file mode 100644 index 0000000..d3c7b18 --- /dev/null +++ b/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java @@ -0,0 +1,45 @@ +package com.github.jukkakarvanen.kafka.streams.integration.utils; + +import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; + +/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. + * + * @author Jukka Karvanen + * + * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 + * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will catch and ignore the exception + * happening during the tear down of the test + * The exception does not have affect to functionality + */ + +public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster { + private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class); + + public TestEmbeddedKafkaCluster(int numBrokers) { + super(numBrokers); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) { + super(numBrokers, brokerConfig); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) { + super(numBrokers, brokerConfig, mockTimeMillisStart); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, long mockTimeNanoStart) { + super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart); + } + + public void after() { + try { + super.after(); + } catch (RuntimeException e) { + log.warn("Ignoring exception, test failing in Windows due this exception {}", e); + } + } +} diff --git a/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java b/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java new file mode 100644 index 0000000..ef2bc4e --- /dev/null +++ b/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java @@ -0,0 +1,47 @@ +package com.github.jukkakarvanen.kafka.streams.integration.utils; + +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.streams.KafkaClientSupplier; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.Topology; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; + +/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. + * + * @author Jukka Karvanen + * + * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 + * Replacing KafkaStreams with TestKafkaStreams will catch and ignore the exception caused by cleanUp + * The exception does not have affect to functionality + */ + +public class TestKafkaStreams extends KafkaStreams { + private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class); + + public TestKafkaStreams(Topology topology, Properties props) { + super(topology, props); + } + + public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) { + super(topology, props, clientSupplier); + } + + public TestKafkaStreams(Topology topology, Properties props, Time time) { + super(topology, props, time); + } + + public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) { + super(topology, props, clientSupplier, time); + } + + public void cleanUp() { + try { + super.cleanUp(); + } catch (RuntimeException e) { + log.warn("Ignoring exception, test failing in Windows due this exception {}", e); + } + } +} diff --git a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java b/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java index 33c1149..9b17b35 100644 --- a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java +++ b/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java @@ -8,6 +8,7 @@ import java.util.Properties; import java.util.stream.Stream; +import com.github.jukkakarvanen.kafka.streams.integration.utils.TestEmbeddedKafkaCluster; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.serialization.Serdes; @@ -43,7 +44,7 @@ public class Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest { @ClassRule - public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1); + public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1); private static final String inputTopic = "IrisInputTopic"; private static final String outputTopic = "IrisOutputTopic"; diff --git a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java b/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java index 6381cd9..01a07c8 100644 --- a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java +++ b/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java @@ -6,6 +6,7 @@ import java.util.List; import java.util.Properties; +import com.github.jukkakarvanen.kafka.streams.integration.utils.TestEmbeddedKafkaCluster; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.serialization.Serdes; @@ -42,7 +43,7 @@ public class Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest { @ClassRule - public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1); + public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1); private static final String inputTopic = "AirlineInputTopic"; private static final String outputTopic = "AirlineOutputTopic"; diff --git a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java b/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java index 4cd837d..5e23ee6 100644 --- a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java +++ b/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java @@ -6,6 +6,7 @@ import java.util.List; import java.util.Properties; +import com.github.jukkakarvanen.kafka.streams.integration.utils.TestEmbeddedKafkaCluster; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.serialization.Serdes; @@ -48,7 +49,7 @@ public class Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest { @ClassRule // public static final EmbeddedSingleNodeKafkaCluster CLUSTER = new EmbeddedSingleNodeKafkaCluster(); - public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1); + public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1); private static final String inputTopic = "AirlineInputTopic"; private static final String outputTopic = "AirlineOutputTopic"; diff --git a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java b/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java index 835041c..e58e7dc 100644 --- a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java +++ b/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java @@ -11,6 +11,8 @@ import java.util.List; import java.util.Properties; +import com.github.jukkakarvanen.kafka.streams.integration.utils.TestEmbeddedKafkaCluster; +import com.github.jukkakarvanen.kafka.streams.integration.utils.TestKafkaStreams; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.serialization.Serdes; @@ -48,7 +50,7 @@ public class Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest { @ClassRule - public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1); + public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1); private static final String inputTopic = "ImageInputTopic"; private static final String outputTopic = "ImageOutputTopic"; @@ -144,7 +146,7 @@ public void shouldRecognizeImages() throws Exception { // Start Kafka Streams Application to process new incoming messages from // Input Topic - final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration); + final KafkaStreams streams = new TestKafkaStreams(builder.build(), streamsConfiguration); streams.cleanUp(); streams.start(); System.out.println("Image Recognition Microservice is running..."); diff --git a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java b/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java index 2b09fe6..148bc24 100644 --- a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java +++ b/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java @@ -6,6 +6,8 @@ import java.util.List; import java.util.Properties; +import com.github.jukkakarvanen.kafka.streams.integration.utils.TestEmbeddedKafkaCluster; +import com.github.jukkakarvanen.kafka.streams.integration.utils.TestKafkaStreams; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.serialization.Serdes; @@ -43,7 +45,7 @@ public class Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest { @ClassRule - public static final EmbeddedKafkaCluster CLUSTER = new EmbeddedKafkaCluster(1); + public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1); private static final String inputTopic = "InputTopic"; private static final String outputTopic = "OutputTopic"; @@ -120,7 +122,7 @@ public void shouldPredictValues() throws Exception { // Start Kafka Streams Application to process new incoming messages from // Input Topic - final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration); + final KafkaStreams streams = new TestKafkaStreams(builder.build(), streamsConfiguration); streams.cleanUp(); streams.start(); System.out.println("Prediction Microservice is running..."); From 06637f0bab874cb8ff0d91cca19e727148e8f7fa Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Thu, 28 Mar 2019 16:55:31 +0200 Subject: [PATCH 03/20] Examples split to submodules --- DL4J_DeepLearning_Iris/pom.xml | 180 +++++++++++++++++ DL4J_DeepLearning_Iris/readme.md | 168 ++++++++++++++++ .../models/DeepLearning4J_CSV_Iris_Model.java | 0 .../DeepLearning4J_CSV_Model_Inference.java | 0 .../main/resources/DL4J_Resources/iris.txt | 0 .../generatedModels/DL4J/DL4J_Iris_Model.zip | Bin .../src/main/resources/log4j.properties | 5 + .../utils/TestEmbeddedKafkaCluster.java | 45 +++++ .../integration/utils/TestKafkaStreams.java | 47 +++++ ...L4J_DeepLearning_Iris_IntegrationTest.java | 0 H2O_DeepLearning/pom.xml | 180 +++++++++++++++++ H2O_DeepLearning/readme.md | 168 ++++++++++++++++ ...hineLearning_H2O_DeepLearning_Example.java | 0 ...reams_MachineLearning_H2O_GBM_Example.java | 0 ...neLearning_H2O_GBM_Example_Refactored.java | 0 ..._fe7c1f02_08ec_4070_b784_c2531147e451.java | 0 .../machinelearning/models/gbm_pojo_test.java | 0 ...DeepWater_model_python_1503570558230_1.zip | Bin .../GBM_model_python_1503397740678_1.zip | Bin .../src/main/resources/log4j.properties | 5 + .../utils/TestEmbeddedKafkaCluster.java | 45 +++++ .../integration/utils/TestKafkaStreams.java | 47 +++++ ..._DeepLearning_Example_IntegrationTest.java | 0 ...rning_H2O_GBM_Example_IntegrationTest.java | 0 TensorFlow_Image_Recognition/pom.xml | 181 ++++++++++++++++++ TensorFlow_Image_Recognition/readme.md | 168 ++++++++++++++++ ..._TensorFlow_Image_Recognition_Example.java | 0 .../resources/TensorFlow_Images/devil.png | Bin .../TensorFlow_Images/new_airplane.jpg | Bin .../TensorFlow_Images/trained_airplane_1.jpg | Bin .../TensorFlow_Images/trained_airplane_2.jpg | Bin .../TensorFlow_Images/trained_butterfly.jpg | Bin .../generatedModels/CNN_inception5h/LICENSE | 0 .../imagenet_comp_graph_label_strings.txt | 0 .../tensorflow_inception_graph.pb | Bin .../TensorFlow_Census/saved_model.pb | Bin .../TensorFlow_Census/test.json | 0 .../variables/variables.data-00000-of-00001 | Bin .../variables/variables.index | Bin .../src/main/resources/log4j.properties | 5 + .../utils/TestEmbeddedKafkaCluster.java | 45 +++++ .../integration/utils/TestKafkaStreams.java | 47 +++++ ...e_Recognition_Example_IntegrationTest.java | 0 TensorFlow_Keras/pom.xml | 180 +++++++++++++++++ TensorFlow_Keras/readme.md | 168 ++++++++++++++++ .../Keras/keras-model-script.py | 0 .../generatedModels/Keras/simple_mlp.h5 | Bin .../src/main/resources/log4j.properties | 5 + .../utils/TestEmbeddedKafkaCluster.java | 45 +++++ .../integration/utils/TestKafkaStreams.java | 47 +++++ ...sorFlow_Keras_Example_IntegrationTest.java | 0 51 files changed, 1781 insertions(+) create mode 100644 DL4J_DeepLearning_Iris/pom.xml create mode 100644 DL4J_DeepLearning_Iris/readme.md rename {src => DL4J_DeepLearning_Iris/src}/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Iris_Model.java (100%) rename {src => DL4J_DeepLearning_Iris/src}/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model_Inference.java (100%) rename {src => DL4J_DeepLearning_Iris/src}/main/resources/DL4J_Resources/iris.txt (100%) rename {src => DL4J_DeepLearning_Iris/src}/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip (100%) create mode 100644 DL4J_DeepLearning_Iris/src/main/resources/log4j.properties create mode 100644 DL4J_DeepLearning_Iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java create mode 100644 DL4J_DeepLearning_Iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java rename {src => DL4J_DeepLearning_Iris/src}/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java (100%) create mode 100644 H2O_DeepLearning/pom.xml create mode 100644 H2O_DeepLearning/readme.md rename {src => H2O_DeepLearning/src}/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java (100%) rename {src => H2O_DeepLearning/src}/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java (100%) rename {src => H2O_DeepLearning/src}/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java (100%) rename {src => H2O_DeepLearning/src}/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java (100%) rename {src => H2O_DeepLearning/src}/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java (100%) rename {src => H2O_DeepLearning/src}/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip (100%) rename {src => H2O_DeepLearning/src}/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip (100%) create mode 100644 H2O_DeepLearning/src/main/resources/log4j.properties create mode 100644 H2O_DeepLearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java create mode 100644 H2O_DeepLearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java rename {src => H2O_DeepLearning/src}/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java (100%) rename {src => H2O_DeepLearning/src}/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java (100%) create mode 100644 TensorFlow_Image_Recognition/pom.xml create mode 100644 TensorFlow_Image_Recognition/readme.md rename {src => TensorFlow_Image_Recognition/src}/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java (100%) rename {src => TensorFlow_Image_Recognition/src}/main/resources/TensorFlow_Images/devil.png (100%) rename {src => TensorFlow_Image_Recognition/src}/main/resources/TensorFlow_Images/new_airplane.jpg (100%) rename {src => TensorFlow_Image_Recognition/src}/main/resources/TensorFlow_Images/trained_airplane_1.jpg (100%) rename {src => TensorFlow_Image_Recognition/src}/main/resources/TensorFlow_Images/trained_airplane_2.jpg (100%) rename {src => TensorFlow_Image_Recognition/src}/main/resources/TensorFlow_Images/trained_butterfly.jpg (100%) rename {src => TensorFlow_Image_Recognition/src}/main/resources/generatedModels/CNN_inception5h/LICENSE (100%) rename {src => TensorFlow_Image_Recognition/src}/main/resources/generatedModels/CNN_inception5h/imagenet_comp_graph_label_strings.txt (100%) rename {src => TensorFlow_Image_Recognition/src}/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb (100%) rename {src => TensorFlow_Image_Recognition/src}/main/resources/generatedModels/TensorFlow_Census/saved_model.pb (100%) rename {src => TensorFlow_Image_Recognition/src}/main/resources/generatedModels/TensorFlow_Census/test.json (100%) mode change 100755 => 100644 rename {src => TensorFlow_Image_Recognition/src}/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001 (100%) rename {src => TensorFlow_Image_Recognition/src}/main/resources/generatedModels/TensorFlow_Census/variables/variables.index (100%) create mode 100644 TensorFlow_Image_Recognition/src/main/resources/log4j.properties create mode 100644 TensorFlow_Image_Recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java create mode 100644 TensorFlow_Image_Recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java rename {src => TensorFlow_Image_Recognition/src}/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java (100%) create mode 100644 TensorFlow_Keras/pom.xml create mode 100644 TensorFlow_Keras/readme.md rename {src => TensorFlow_Keras/src}/main/resources/generatedModels/Keras/keras-model-script.py (100%) rename {src => TensorFlow_Keras/src}/main/resources/generatedModels/Keras/simple_mlp.h5 (100%) create mode 100644 TensorFlow_Keras/src/main/resources/log4j.properties create mode 100644 TensorFlow_Keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java create mode 100644 TensorFlow_Keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java rename {src => TensorFlow_Keras/src}/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java (100%) diff --git a/DL4J_DeepLearning_Iris/pom.xml b/DL4J_DeepLearning_Iris/pom.xml new file mode 100644 index 0000000..85f7099 --- /dev/null +++ b/DL4J_DeepLearning_Iris/pom.xml @@ -0,0 +1,180 @@ + + + 4.0.0 + + com.github.kaiwaehner.kafka.streams.machinelearning + dl4j-deepLearning-iris + CP51_AK21 + + + + confluent + http://packages.confluent.io/maven/ + + + + + 1.8 + 2.1.0 + 2.11 + ${kafka.scala.version}.8 + 5.1.0 + UTF-8 + + + + + + + + org.apache.kafka + kafka-streams + ${kafka.version} + + + + + + + + org.nd4j + nd4j-native-platform + 1.0.0-beta3 + + + + + + org.deeplearning4j + deeplearning4j-core + 1.0.0-beta3 + + + + org.deeplearning4j + deeplearning4j-modelimport + 1.0.0-beta3 + + + + + + org.tensorflow + tensorflow + 1.3.0 + + + + + + ai.h2o + h2o-genmodel + 3.14.0.1 + + + + + + junit + junit + 4.12 + test + + + org.assertj + assertj-core + 3.3.0 + test + + + org.apache.kafka + kafka_${kafka.scala.version} + ${kafka.version} + test + test + + + org.apache.kafka + kafka-clients + ${kafka.version} + test + test + + + org.apache.kafka + kafka-streams + ${kafka.version} + test + test + + + org.apache.curator + curator-test + 2.9.0 + test + + + io.confluent + kafka-schema-registry + ${confluent.version} + test + + + io.confluent + kafka-schema-registry + ${confluent.version} + + tests + test + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.6.1 + + 1.8 + 1.8 + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.5.2 + + + jar-with-dependencies + + + + true + com.github.megachucky.kafka.streams.machinelearning.StreamsStarterApp + + + + + + assemble-all + package + + single + + + + + + + diff --git a/DL4J_DeepLearning_Iris/readme.md b/DL4J_DeepLearning_Iris/readme.md new file mode 100644 index 0000000..e684ab6 --- /dev/null +++ b/DL4J_DeepLearning_Iris/readme.md @@ -0,0 +1,168 @@ +# Machine Learning + Kafka Streams Examples + +This project contains **examples which demonstrate how to deploy analytic models to mission-critical, scalable production leveraging [Apache Kafka](https://kafka.apache.org/) and its [Streams API](docs.confluent.io/current/streams/index.html).** +Examples will include analytic models built with TensorFlow, Keras, H2O, Python, DeepLearning4J and other technologies. + +![Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure](http://www.kai-waehner.de/blog/wp-content/uploads/2017/10/Apache_Kafka_Ecosystem_Kafka_Streams_Machine_Learning.png "Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure") + + +## Material (Blogs Posts, Slides, Videos) +Here is some material about this topic if you want to read and listen to the theory instead of just doing hands-on: +- Blog Post: [How to Build and Deploy Scalable Machine Learning in Production with Apache Kafka](https://www.confluent.io/blog/build-deploy-scalable-machine-learning-production-apache-kafka/) +- Slide Deck: [Apache Kafka + Machine Learning => Intelligent Real Time Applications](https://www.slideshare.net/KaiWaehner/apache-kafka-streams-machine-learning-deep-learning) +- Slide Deck: [Deep Learning at Extreme Scale (in the Cloud) 
with the Apache Kafka Open Source Ecosystem](https://www.slideshare.net/KaiWaehner/deep-learning-at-extreme-scale-in-the-cloud-with-the-apache-kafka-open-source-ecosystem) +- Video Recording: [Deep Learning in Mission Critical and Scalable Real Time Applications with Open Source Frameworks](https://vimeo.com/jaxtv/review/256406763/7fbf4213be) +- Blog Post: [Using Apache Kafka to Drive Cutting-Edge Machine Learning - Hybrid ML Architectures, AutoML, and more...](https://www.confluent.io/blog/using-apache-kafka-drive-cutting-edge-machine-learning) +- Blog Post: [Machine Learning with Python, Jupyter, KSQL and TensorFlow](https://www.confluent.io/blog/machine-learning-with-python-jupyter-ksql-tensorflow) + +## Use Cases and Technologies + +##### The following examples are already available including unit tests: + +* Deployment of a H2O GBM model to a Kafka Streams application for prediction of flight delays +* Deployment of a H2O Deep Learning model to a Kafka Streams application for prediction of flight delays +* Deployment of a pre-built TensorFlow CNN model for image recognition +* Deployment of a DL4J model to predict the species of Iris flowers +* Deployment of a Keras model (trained with TensorFlow backend) using the Import Model API from DeepLearning4J + +**More sophisticated use cases around Kafka Streams and other technologies will be added over time in this or related Github project. Some ideas**: + +* Image Recognition with H2O and TensorFlow (to show the difference of using H2O instead of using just low level TensorFlow APIs) +* Anomaly Detection with Autoencoders leveraging DeepLearning4J. +* Cross Selling and Customer Churn Detection using classical Machine Learning algorithms but also Deep Learning +* Stateful Stream Processing to combine different model execution steps into a more powerful workflow instead of "just" inferencing single events (a good example might be a streaming process with sliding or session windows). +* Keras to build different models with Python, TensorFlow, Theano and other Deep Learning frameworks under the hood + Kafka Streams as generic Machine Learning infrastructure to deploy, execute and monitor these different models. + +##### Some other Github projects exist already with more ML + Kafka content: + +- Deep Learning UDF for KSQL: [Streaming Anomaly Detection of MQTT IoT Sensor Data using an Autoencoder](https://github.com/kaiwaehner/ksql-udf-deep-learning-mqtt-iot) +- End-to-End ML Integration Demo: [Continuous Health Checks with Anomaly Detection using KSQL, Kafka Connect, Deep Learning and Elasticsearch](https://github.com/kaiwaehner/ksql-fork-with-deep-learning-function) +- TensorFlow Serving + gRPC + Kafka Streams on Github => Stream Processing and RPC / Request-Response concepts combined: [Model inference with Apache Kafka, Kafka Streams and a TensorFlow model deployed on a TensorFlow Serving model server](https://github.com/kaiwaehner/tensorflow-serving-java-grpc-kafka-streams) + +## Requirements, Installation and Usage +The code is developed and tested on Mac and Linux operating systems. As Kafka does not support and work well on Windows, this is not tested at all. + +Java 8 and Maven 3 are required. Maven will download all required dependencies. + +Just download the project and run + + mvn clean package + +Apache Kafka 2.1 is currently used. The code is also compatible with Kafka and Kafka Streams 1.1 and 2.0. + +**Please make sure to run the Maven build without any changes first.** If it works without errors, you can change library versions, Java version, etc. and see if it still works or if you need to adjust code. + +Every examples includes an implementation and an unit test. The examples are very simple and lightweight. No further configuration is needed to build and run it. Though, for this reason, the generated models are also included (and increase the download size of the project). + +The unit tests use some Kafka helper classes like EmbeddedSingleNodeKafkaCluster in package **com.github.megachucky.kafka.streams.machinelearning.test.utils** so that you can run it without any other configuration or Kafka setup. +If you want to run an implementation of a main class in package **com.github.megachucky.kafka.streams.machinelearning**, you need to start a Kafka cluster (with at least one Zookeeper and one Kafka broker running) and also create the required topics. So check out the unit tests first. + + +### Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays + +**Use Case** + +Gradient Boosting Method (GBM) to predict flight delays. +A H2O generated GBM Java model (POJO) is instantiated and used in a Kafka Streams application to do interference on new events. + +**Machine Learning Technology** + +* [H2O](https://www.h2o.ai) +* Check the [H2O demo](https://github.com/h2oai/h2o-2/wiki/Hacking-Airline-DataSet-with-H2O) to understand the test and and how the model was built +* You can re-use the generated Java model attached to this project ([gbm_pojo_test.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java)) or build your own model using R, Python, Flow UI or any other technologies supported by H2O framework. + +**Source Code** + +[MachineLearning_H2O_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java) + +**Unit Test** + +[MachineLearning_H2O_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java) + +**Manual Testing** + +You can easily test this by yourself. Here are the steps: +- Start Kafka, e.g. with Confluent CLI: + + confluent start kafka +- Create topics AirlineInputTopic and AirlineOutputTopic + + kafka-topics --zookeeper localhost:2181 --create --topic AirlineInputTopic --partitions 3 --replication-factor 1 + + kafka-topics --zookeeper localhost:2181 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1 +- Start the Kafka Streams app: + + java -cp target/kafka-streams-machine-learning-examples-1.0-SNAPSHOT-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example +- Send messages, e.g. with kafkacat: + + echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic +- Consume predictions: + + kafka-console-consumer --bootstrap-server localhost:9092 --topic AirlineOutputTopic --from-beginning +- Find more details in the unit test... + + +**H2O Deep Learning instead of H2O GBM Model** + +The project includes another example with similar code to use a [H2O Deep Learning model](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) +This shows how you can easily test or replace different analytic models for one use case, or even use them for A/B testing. + +### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition +**Use Case** + +Convolutional Neural Network (CNN) to for image recognition. +A prebuilt TensorFlow CNN model is instantiated and used in a Kafka Streams application to do recognize new JPEG images. A Kafka Input Topic receives the location of a new images (another option would be to send the image in the Kafka message instead of just a link to it), infers the content of the picture via the TensorFlow model, and sends the result to a Kafka Output Topic. + +**Machine Learning Technology** +* [TensorFlow](https://www.tensorflow.org/) +* Leverages [TensorFlow for Java](https://www.tensorflow.org/install/install_java). These APIs are particularly well-suited for loading models created in Python and executing them within a Java application. Please note: The Java API doesn't yet include convenience functions (which you might know from [Keras](https://keras.io/)), thus a private helper class is used in the example for construction and execution of the pre-built TensorFlow model. +* Check the official TensorFlow demo [LabelImage](https://github.com/kaiwaehner/tensorflow/blob/r1.3/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java) to understand this image recognition example +* You can re-use the pre-trained TensorFlow model attached to this project [tensorflow_inception_graph.pb](http://arxiv.org/abs/1512.00567) or add your own model. +* The 'images' folder contains models which were used for training the model (trained_airplane_1.jpg, trained_airplane_2.jpg, trained_butterfly.jpg) but also a new picture (new_airplane.jpg) which is not known by the model and using a different resolution than the others. Feel free to add your own pictures (they need to be trained, see list of trained pictures in the file: imagenet_comp_graph_label_strings.txt), otherwise the model will return 'unknown'. + +**Source Code** + +[Kafka_Streams_TensorFlow_Image_Recognition_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java) + +**Unit Test** + +[Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java) + +### Example 3 - Iris Prediction using a Neural Network with DeepLearning4J (DL4J) +**Use Case** + +Iris Species Prediction using a Neural Network. +This is a famous example: Prediction of the Iris Species - implemented with many different ML algorithms. Here I use DeepLearning4J (DL4J) to build a neural network using Iris Dataset. + +**Machine Learning Technology** +* [DeepLearning4J](https://deeplearning4j.org) +* Pretty simple example to demo how to build, save and load neural networks with DL4J. [MultiLayerNetwork](https://deeplearning4j.org/doc/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.html) and [INDArray](http://nd4j.org/doc/org/nd4j/linalg/api/ndarray/INDArray.html) are the key APIs to look at if you want to understand the details. +* The model is created via [DeepLearning4J_CSV_Model.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model.java) and stored in the resources: [DL4J_Iris_Model.zip](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/tree/master/src/main/resources/generatedModels/DL4J). No need to re-train, just for reference. Kudos to Adam Gibson who created this example as part of the DL4J project. + +**Unit Test** +[Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java) + +### Example 4 - Python + Keras + TensorFlow + DeepLearning4j + +**Use Case** + +Development of an analytic model trained with Python, Keras and TensorFlow and deployment to Java and Kafka ecosystem. No business case, just a technical demonstration of a simple 'Hello World' Keras model. Feel free to replace the model with any other Keras model trained with your backend of choice. You just need to replace the model binary (and use a model which is compatible with DeepLearning4J 's model importer). + +**Machine Learning Technology** +* [Python](https://www.python.org/) +* [DeepLearning4J](https://deeplearning4j.org) +* [Keras](https://keras.io/) - a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano. +* [TensorFlow](https://www.tensorflow.org/) - used as backend under the hood of Keras +* DeepLearning4J 's [KerasModelImport feature](https://deeplearning4j.org/docs/latest/keras-import-overview) is used for importing the Keras / TensorFlow model into Java. The used model is its 'Hello World' model example. +* The Keras model was trained with this [Python script](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/resources/generatedModels/Keras/keras-model-script.py). + +**Unit Test** + +[Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java) + + + + + + + diff --git a/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Iris_Model.java b/DL4J_DeepLearning_Iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Iris_Model.java similarity index 100% rename from src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Iris_Model.java rename to DL4J_DeepLearning_Iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Iris_Model.java diff --git a/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model_Inference.java b/DL4J_DeepLearning_Iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model_Inference.java similarity index 100% rename from src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model_Inference.java rename to DL4J_DeepLearning_Iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model_Inference.java diff --git a/src/main/resources/DL4J_Resources/iris.txt b/DL4J_DeepLearning_Iris/src/main/resources/DL4J_Resources/iris.txt similarity index 100% rename from src/main/resources/DL4J_Resources/iris.txt rename to DL4J_DeepLearning_Iris/src/main/resources/DL4J_Resources/iris.txt diff --git a/src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip b/DL4J_DeepLearning_Iris/src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip similarity index 100% rename from src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip rename to DL4J_DeepLearning_Iris/src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip diff --git a/DL4J_DeepLearning_Iris/src/main/resources/log4j.properties b/DL4J_DeepLearning_Iris/src/main/resources/log4j.properties new file mode 100644 index 0000000..d511cbd --- /dev/null +++ b/DL4J_DeepLearning_Iris/src/main/resources/log4j.properties @@ -0,0 +1,5 @@ +log4j.rootLogger=INFO, stdout + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n \ No newline at end of file diff --git a/DL4J_DeepLearning_Iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java b/DL4J_DeepLearning_Iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java new file mode 100644 index 0000000..d3c7b18 --- /dev/null +++ b/DL4J_DeepLearning_Iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java @@ -0,0 +1,45 @@ +package com.github.jukkakarvanen.kafka.streams.integration.utils; + +import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; + +/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. + * + * @author Jukka Karvanen + * + * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 + * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will catch and ignore the exception + * happening during the tear down of the test + * The exception does not have affect to functionality + */ + +public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster { + private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class); + + public TestEmbeddedKafkaCluster(int numBrokers) { + super(numBrokers); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) { + super(numBrokers, brokerConfig); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) { + super(numBrokers, brokerConfig, mockTimeMillisStart); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, long mockTimeNanoStart) { + super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart); + } + + public void after() { + try { + super.after(); + } catch (RuntimeException e) { + log.warn("Ignoring exception, test failing in Windows due this exception {}", e); + } + } +} diff --git a/DL4J_DeepLearning_Iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java b/DL4J_DeepLearning_Iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java new file mode 100644 index 0000000..ef2bc4e --- /dev/null +++ b/DL4J_DeepLearning_Iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java @@ -0,0 +1,47 @@ +package com.github.jukkakarvanen.kafka.streams.integration.utils; + +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.streams.KafkaClientSupplier; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.Topology; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; + +/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. + * + * @author Jukka Karvanen + * + * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 + * Replacing KafkaStreams with TestKafkaStreams will catch and ignore the exception caused by cleanUp + * The exception does not have affect to functionality + */ + +public class TestKafkaStreams extends KafkaStreams { + private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class); + + public TestKafkaStreams(Topology topology, Properties props) { + super(topology, props); + } + + public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) { + super(topology, props, clientSupplier); + } + + public TestKafkaStreams(Topology topology, Properties props, Time time) { + super(topology, props, time); + } + + public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) { + super(topology, props, clientSupplier, time); + } + + public void cleanUp() { + try { + super.cleanUp(); + } catch (RuntimeException e) { + log.warn("Ignoring exception, test failing in Windows due this exception {}", e); + } + } +} diff --git a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java b/DL4J_DeepLearning_Iris/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java similarity index 100% rename from src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java rename to DL4J_DeepLearning_Iris/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java diff --git a/H2O_DeepLearning/pom.xml b/H2O_DeepLearning/pom.xml new file mode 100644 index 0000000..35a96f9 --- /dev/null +++ b/H2O_DeepLearning/pom.xml @@ -0,0 +1,180 @@ + + + 4.0.0 + + com.github.kaiwaehner.kafka.streams.machinelearning + h20-deeplearning + CP51_AK21 + + + + confluent + http://packages.confluent.io/maven/ + + + + + 1.8 + 2.1.0 + 2.11 + ${kafka.scala.version}.8 + 5.1.0 + UTF-8 + + + + + + + + org.apache.kafka + kafka-streams + ${kafka.version} + + + + + + + + org.nd4j + nd4j-native-platform + 1.0.0-beta3 + + + + + + org.deeplearning4j + deeplearning4j-core + 1.0.0-beta3 + + + + org.deeplearning4j + deeplearning4j-modelimport + 1.0.0-beta3 + + + + + + org.tensorflow + tensorflow + 1.3.0 + + + + + + ai.h2o + h2o-genmodel + 3.14.0.1 + + + + + + junit + junit + 4.12 + test + + + org.assertj + assertj-core + 3.3.0 + test + + + org.apache.kafka + kafka_${kafka.scala.version} + ${kafka.version} + test + test + + + org.apache.kafka + kafka-clients + ${kafka.version} + test + test + + + org.apache.kafka + kafka-streams + ${kafka.version} + test + test + + + org.apache.curator + curator-test + 2.9.0 + test + + + io.confluent + kafka-schema-registry + ${confluent.version} + test + + + io.confluent + kafka-schema-registry + ${confluent.version} + + tests + test + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.6.1 + + 1.8 + 1.8 + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.5.2 + + + jar-with-dependencies + + + + true + com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_DeepLearning_Example + + + + + + assemble-all + package + + single + + + + + + + diff --git a/H2O_DeepLearning/readme.md b/H2O_DeepLearning/readme.md new file mode 100644 index 0000000..e684ab6 --- /dev/null +++ b/H2O_DeepLearning/readme.md @@ -0,0 +1,168 @@ +# Machine Learning + Kafka Streams Examples + +This project contains **examples which demonstrate how to deploy analytic models to mission-critical, scalable production leveraging [Apache Kafka](https://kafka.apache.org/) and its [Streams API](docs.confluent.io/current/streams/index.html).** +Examples will include analytic models built with TensorFlow, Keras, H2O, Python, DeepLearning4J and other technologies. + +![Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure](http://www.kai-waehner.de/blog/wp-content/uploads/2017/10/Apache_Kafka_Ecosystem_Kafka_Streams_Machine_Learning.png "Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure") + + +## Material (Blogs Posts, Slides, Videos) +Here is some material about this topic if you want to read and listen to the theory instead of just doing hands-on: +- Blog Post: [How to Build and Deploy Scalable Machine Learning in Production with Apache Kafka](https://www.confluent.io/blog/build-deploy-scalable-machine-learning-production-apache-kafka/) +- Slide Deck: [Apache Kafka + Machine Learning => Intelligent Real Time Applications](https://www.slideshare.net/KaiWaehner/apache-kafka-streams-machine-learning-deep-learning) +- Slide Deck: [Deep Learning at Extreme Scale (in the Cloud) 
with the Apache Kafka Open Source Ecosystem](https://www.slideshare.net/KaiWaehner/deep-learning-at-extreme-scale-in-the-cloud-with-the-apache-kafka-open-source-ecosystem) +- Video Recording: [Deep Learning in Mission Critical and Scalable Real Time Applications with Open Source Frameworks](https://vimeo.com/jaxtv/review/256406763/7fbf4213be) +- Blog Post: [Using Apache Kafka to Drive Cutting-Edge Machine Learning - Hybrid ML Architectures, AutoML, and more...](https://www.confluent.io/blog/using-apache-kafka-drive-cutting-edge-machine-learning) +- Blog Post: [Machine Learning with Python, Jupyter, KSQL and TensorFlow](https://www.confluent.io/blog/machine-learning-with-python-jupyter-ksql-tensorflow) + +## Use Cases and Technologies + +##### The following examples are already available including unit tests: + +* Deployment of a H2O GBM model to a Kafka Streams application for prediction of flight delays +* Deployment of a H2O Deep Learning model to a Kafka Streams application for prediction of flight delays +* Deployment of a pre-built TensorFlow CNN model for image recognition +* Deployment of a DL4J model to predict the species of Iris flowers +* Deployment of a Keras model (trained with TensorFlow backend) using the Import Model API from DeepLearning4J + +**More sophisticated use cases around Kafka Streams and other technologies will be added over time in this or related Github project. Some ideas**: + +* Image Recognition with H2O and TensorFlow (to show the difference of using H2O instead of using just low level TensorFlow APIs) +* Anomaly Detection with Autoencoders leveraging DeepLearning4J. +* Cross Selling and Customer Churn Detection using classical Machine Learning algorithms but also Deep Learning +* Stateful Stream Processing to combine different model execution steps into a more powerful workflow instead of "just" inferencing single events (a good example might be a streaming process with sliding or session windows). +* Keras to build different models with Python, TensorFlow, Theano and other Deep Learning frameworks under the hood + Kafka Streams as generic Machine Learning infrastructure to deploy, execute and monitor these different models. + +##### Some other Github projects exist already with more ML + Kafka content: + +- Deep Learning UDF for KSQL: [Streaming Anomaly Detection of MQTT IoT Sensor Data using an Autoencoder](https://github.com/kaiwaehner/ksql-udf-deep-learning-mqtt-iot) +- End-to-End ML Integration Demo: [Continuous Health Checks with Anomaly Detection using KSQL, Kafka Connect, Deep Learning and Elasticsearch](https://github.com/kaiwaehner/ksql-fork-with-deep-learning-function) +- TensorFlow Serving + gRPC + Kafka Streams on Github => Stream Processing and RPC / Request-Response concepts combined: [Model inference with Apache Kafka, Kafka Streams and a TensorFlow model deployed on a TensorFlow Serving model server](https://github.com/kaiwaehner/tensorflow-serving-java-grpc-kafka-streams) + +## Requirements, Installation and Usage +The code is developed and tested on Mac and Linux operating systems. As Kafka does not support and work well on Windows, this is not tested at all. + +Java 8 and Maven 3 are required. Maven will download all required dependencies. + +Just download the project and run + + mvn clean package + +Apache Kafka 2.1 is currently used. The code is also compatible with Kafka and Kafka Streams 1.1 and 2.0. + +**Please make sure to run the Maven build without any changes first.** If it works without errors, you can change library versions, Java version, etc. and see if it still works or if you need to adjust code. + +Every examples includes an implementation and an unit test. The examples are very simple and lightweight. No further configuration is needed to build and run it. Though, for this reason, the generated models are also included (and increase the download size of the project). + +The unit tests use some Kafka helper classes like EmbeddedSingleNodeKafkaCluster in package **com.github.megachucky.kafka.streams.machinelearning.test.utils** so that you can run it without any other configuration or Kafka setup. +If you want to run an implementation of a main class in package **com.github.megachucky.kafka.streams.machinelearning**, you need to start a Kafka cluster (with at least one Zookeeper and one Kafka broker running) and also create the required topics. So check out the unit tests first. + + +### Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays + +**Use Case** + +Gradient Boosting Method (GBM) to predict flight delays. +A H2O generated GBM Java model (POJO) is instantiated and used in a Kafka Streams application to do interference on new events. + +**Machine Learning Technology** + +* [H2O](https://www.h2o.ai) +* Check the [H2O demo](https://github.com/h2oai/h2o-2/wiki/Hacking-Airline-DataSet-with-H2O) to understand the test and and how the model was built +* You can re-use the generated Java model attached to this project ([gbm_pojo_test.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java)) or build your own model using R, Python, Flow UI or any other technologies supported by H2O framework. + +**Source Code** + +[MachineLearning_H2O_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java) + +**Unit Test** + +[MachineLearning_H2O_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java) + +**Manual Testing** + +You can easily test this by yourself. Here are the steps: +- Start Kafka, e.g. with Confluent CLI: + + confluent start kafka +- Create topics AirlineInputTopic and AirlineOutputTopic + + kafka-topics --zookeeper localhost:2181 --create --topic AirlineInputTopic --partitions 3 --replication-factor 1 + + kafka-topics --zookeeper localhost:2181 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1 +- Start the Kafka Streams app: + + java -cp target/kafka-streams-machine-learning-examples-1.0-SNAPSHOT-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example +- Send messages, e.g. with kafkacat: + + echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic +- Consume predictions: + + kafka-console-consumer --bootstrap-server localhost:9092 --topic AirlineOutputTopic --from-beginning +- Find more details in the unit test... + + +**H2O Deep Learning instead of H2O GBM Model** + +The project includes another example with similar code to use a [H2O Deep Learning model](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) +This shows how you can easily test or replace different analytic models for one use case, or even use them for A/B testing. + +### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition +**Use Case** + +Convolutional Neural Network (CNN) to for image recognition. +A prebuilt TensorFlow CNN model is instantiated and used in a Kafka Streams application to do recognize new JPEG images. A Kafka Input Topic receives the location of a new images (another option would be to send the image in the Kafka message instead of just a link to it), infers the content of the picture via the TensorFlow model, and sends the result to a Kafka Output Topic. + +**Machine Learning Technology** +* [TensorFlow](https://www.tensorflow.org/) +* Leverages [TensorFlow for Java](https://www.tensorflow.org/install/install_java). These APIs are particularly well-suited for loading models created in Python and executing them within a Java application. Please note: The Java API doesn't yet include convenience functions (which you might know from [Keras](https://keras.io/)), thus a private helper class is used in the example for construction and execution of the pre-built TensorFlow model. +* Check the official TensorFlow demo [LabelImage](https://github.com/kaiwaehner/tensorflow/blob/r1.3/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java) to understand this image recognition example +* You can re-use the pre-trained TensorFlow model attached to this project [tensorflow_inception_graph.pb](http://arxiv.org/abs/1512.00567) or add your own model. +* The 'images' folder contains models which were used for training the model (trained_airplane_1.jpg, trained_airplane_2.jpg, trained_butterfly.jpg) but also a new picture (new_airplane.jpg) which is not known by the model and using a different resolution than the others. Feel free to add your own pictures (they need to be trained, see list of trained pictures in the file: imagenet_comp_graph_label_strings.txt), otherwise the model will return 'unknown'. + +**Source Code** + +[Kafka_Streams_TensorFlow_Image_Recognition_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java) + +**Unit Test** + +[Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java) + +### Example 3 - Iris Prediction using a Neural Network with DeepLearning4J (DL4J) +**Use Case** + +Iris Species Prediction using a Neural Network. +This is a famous example: Prediction of the Iris Species - implemented with many different ML algorithms. Here I use DeepLearning4J (DL4J) to build a neural network using Iris Dataset. + +**Machine Learning Technology** +* [DeepLearning4J](https://deeplearning4j.org) +* Pretty simple example to demo how to build, save and load neural networks with DL4J. [MultiLayerNetwork](https://deeplearning4j.org/doc/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.html) and [INDArray](http://nd4j.org/doc/org/nd4j/linalg/api/ndarray/INDArray.html) are the key APIs to look at if you want to understand the details. +* The model is created via [DeepLearning4J_CSV_Model.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model.java) and stored in the resources: [DL4J_Iris_Model.zip](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/tree/master/src/main/resources/generatedModels/DL4J). No need to re-train, just for reference. Kudos to Adam Gibson who created this example as part of the DL4J project. + +**Unit Test** +[Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java) + +### Example 4 - Python + Keras + TensorFlow + DeepLearning4j + +**Use Case** + +Development of an analytic model trained with Python, Keras and TensorFlow and deployment to Java and Kafka ecosystem. No business case, just a technical demonstration of a simple 'Hello World' Keras model. Feel free to replace the model with any other Keras model trained with your backend of choice. You just need to replace the model binary (and use a model which is compatible with DeepLearning4J 's model importer). + +**Machine Learning Technology** +* [Python](https://www.python.org/) +* [DeepLearning4J](https://deeplearning4j.org) +* [Keras](https://keras.io/) - a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano. +* [TensorFlow](https://www.tensorflow.org/) - used as backend under the hood of Keras +* DeepLearning4J 's [KerasModelImport feature](https://deeplearning4j.org/docs/latest/keras-import-overview) is used for importing the Keras / TensorFlow model into Java. The used model is its 'Hello World' model example. +* The Keras model was trained with this [Python script](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/resources/generatedModels/Keras/keras-model-script.py). + +**Unit Test** + +[Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java) + + + + + + + diff --git a/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java b/H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java similarity index 100% rename from src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java rename to H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java diff --git a/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java b/H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java similarity index 100% rename from src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java rename to H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java diff --git a/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java b/H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java similarity index 100% rename from src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java rename to H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java diff --git a/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java b/H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java similarity index 100% rename from src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java rename to H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java diff --git a/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java b/H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java similarity index 100% rename from src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java rename to H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java diff --git a/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip b/H2O_DeepLearning/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip similarity index 100% rename from src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip rename to H2O_DeepLearning/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip diff --git a/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip b/H2O_DeepLearning/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip similarity index 100% rename from src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip rename to H2O_DeepLearning/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip diff --git a/H2O_DeepLearning/src/main/resources/log4j.properties b/H2O_DeepLearning/src/main/resources/log4j.properties new file mode 100644 index 0000000..d511cbd --- /dev/null +++ b/H2O_DeepLearning/src/main/resources/log4j.properties @@ -0,0 +1,5 @@ +log4j.rootLogger=INFO, stdout + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n \ No newline at end of file diff --git a/H2O_DeepLearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java b/H2O_DeepLearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java new file mode 100644 index 0000000..d3c7b18 --- /dev/null +++ b/H2O_DeepLearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java @@ -0,0 +1,45 @@ +package com.github.jukkakarvanen.kafka.streams.integration.utils; + +import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; + +/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. + * + * @author Jukka Karvanen + * + * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 + * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will catch and ignore the exception + * happening during the tear down of the test + * The exception does not have affect to functionality + */ + +public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster { + private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class); + + public TestEmbeddedKafkaCluster(int numBrokers) { + super(numBrokers); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) { + super(numBrokers, brokerConfig); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) { + super(numBrokers, brokerConfig, mockTimeMillisStart); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, long mockTimeNanoStart) { + super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart); + } + + public void after() { + try { + super.after(); + } catch (RuntimeException e) { + log.warn("Ignoring exception, test failing in Windows due this exception {}", e); + } + } +} diff --git a/H2O_DeepLearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java b/H2O_DeepLearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java new file mode 100644 index 0000000..ef2bc4e --- /dev/null +++ b/H2O_DeepLearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java @@ -0,0 +1,47 @@ +package com.github.jukkakarvanen.kafka.streams.integration.utils; + +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.streams.KafkaClientSupplier; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.Topology; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; + +/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. + * + * @author Jukka Karvanen + * + * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 + * Replacing KafkaStreams with TestKafkaStreams will catch and ignore the exception caused by cleanUp + * The exception does not have affect to functionality + */ + +public class TestKafkaStreams extends KafkaStreams { + private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class); + + public TestKafkaStreams(Topology topology, Properties props) { + super(topology, props); + } + + public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) { + super(topology, props, clientSupplier); + } + + public TestKafkaStreams(Topology topology, Properties props, Time time) { + super(topology, props, time); + } + + public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) { + super(topology, props, clientSupplier, time); + } + + public void cleanUp() { + try { + super.cleanUp(); + } catch (RuntimeException e) { + log.warn("Ignoring exception, test failing in Windows due this exception {}", e); + } + } +} diff --git a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java b/H2O_DeepLearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java similarity index 100% rename from src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java rename to H2O_DeepLearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java diff --git a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java b/H2O_DeepLearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java similarity index 100% rename from src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java rename to H2O_DeepLearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java diff --git a/TensorFlow_Image_Recognition/pom.xml b/TensorFlow_Image_Recognition/pom.xml new file mode 100644 index 0000000..b2c6659 --- /dev/null +++ b/TensorFlow_Image_Recognition/pom.xml @@ -0,0 +1,181 @@ + + + 4.0.0 + + com.github.kaiwaehner.kafka.streams.machinelearning + tensorflow-image-recognition + CP51_AK21 + + + + confluent + http://packages.confluent.io/maven/ + + + + + 1.8 + 2.1.0 + 2.11 + ${kafka.scala.version}.8 + 5.1.0 + UTF-8 + + + + + + + + org.apache.kafka + kafka-streams + ${kafka.version} + + + + + + + + org.nd4j + nd4j-native-platform + 1.0.0-beta3 + + + + + + org.deeplearning4j + deeplearning4j-core + 1.0.0-beta3 + + + + org.deeplearning4j + deeplearning4j-modelimport + 1.0.0-beta3 + + + + + + org.tensorflow + tensorflow + 1.3.0 + + + + + + ai.h2o + h2o-genmodel + 3.14.0.1 + + + + + + junit + junit + 4.12 + test + + + org.assertj + assertj-core + 3.3.0 + test + + + org.apache.kafka + kafka_${kafka.scala.version} + ${kafka.version} + test + test + + + org.apache.kafka + kafka-clients + ${kafka.version} + test + test + + + org.apache.kafka + kafka-streams + ${kafka.version} + test + test + + + org.apache.curator + curator-test + 2.9.0 + test + + + io.confluent + kafka-schema-registry + ${confluent.version} + test + + + io.confluent + kafka-schema-registry + ${confluent.version} + + tests + test + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.6.1 + + 1.8 + 1.8 + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.5.2 + + + jar-with-dependencies + + + + true + com.github.megachucky.kafka.streams.machinelearning.public class Kafka_Streams_TensorFlow_Image_Recognition_Example { + + + + + + + assemble-all + package + + single + + + + + + + diff --git a/TensorFlow_Image_Recognition/readme.md b/TensorFlow_Image_Recognition/readme.md new file mode 100644 index 0000000..e684ab6 --- /dev/null +++ b/TensorFlow_Image_Recognition/readme.md @@ -0,0 +1,168 @@ +# Machine Learning + Kafka Streams Examples + +This project contains **examples which demonstrate how to deploy analytic models to mission-critical, scalable production leveraging [Apache Kafka](https://kafka.apache.org/) and its [Streams API](docs.confluent.io/current/streams/index.html).** +Examples will include analytic models built with TensorFlow, Keras, H2O, Python, DeepLearning4J and other technologies. + +![Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure](http://www.kai-waehner.de/blog/wp-content/uploads/2017/10/Apache_Kafka_Ecosystem_Kafka_Streams_Machine_Learning.png "Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure") + + +## Material (Blogs Posts, Slides, Videos) +Here is some material about this topic if you want to read and listen to the theory instead of just doing hands-on: +- Blog Post: [How to Build and Deploy Scalable Machine Learning in Production with Apache Kafka](https://www.confluent.io/blog/build-deploy-scalable-machine-learning-production-apache-kafka/) +- Slide Deck: [Apache Kafka + Machine Learning => Intelligent Real Time Applications](https://www.slideshare.net/KaiWaehner/apache-kafka-streams-machine-learning-deep-learning) +- Slide Deck: [Deep Learning at Extreme Scale (in the Cloud) 
with the Apache Kafka Open Source Ecosystem](https://www.slideshare.net/KaiWaehner/deep-learning-at-extreme-scale-in-the-cloud-with-the-apache-kafka-open-source-ecosystem) +- Video Recording: [Deep Learning in Mission Critical and Scalable Real Time Applications with Open Source Frameworks](https://vimeo.com/jaxtv/review/256406763/7fbf4213be) +- Blog Post: [Using Apache Kafka to Drive Cutting-Edge Machine Learning - Hybrid ML Architectures, AutoML, and more...](https://www.confluent.io/blog/using-apache-kafka-drive-cutting-edge-machine-learning) +- Blog Post: [Machine Learning with Python, Jupyter, KSQL and TensorFlow](https://www.confluent.io/blog/machine-learning-with-python-jupyter-ksql-tensorflow) + +## Use Cases and Technologies + +##### The following examples are already available including unit tests: + +* Deployment of a H2O GBM model to a Kafka Streams application for prediction of flight delays +* Deployment of a H2O Deep Learning model to a Kafka Streams application for prediction of flight delays +* Deployment of a pre-built TensorFlow CNN model for image recognition +* Deployment of a DL4J model to predict the species of Iris flowers +* Deployment of a Keras model (trained with TensorFlow backend) using the Import Model API from DeepLearning4J + +**More sophisticated use cases around Kafka Streams and other technologies will be added over time in this or related Github project. Some ideas**: + +* Image Recognition with H2O and TensorFlow (to show the difference of using H2O instead of using just low level TensorFlow APIs) +* Anomaly Detection with Autoencoders leveraging DeepLearning4J. +* Cross Selling and Customer Churn Detection using classical Machine Learning algorithms but also Deep Learning +* Stateful Stream Processing to combine different model execution steps into a more powerful workflow instead of "just" inferencing single events (a good example might be a streaming process with sliding or session windows). +* Keras to build different models with Python, TensorFlow, Theano and other Deep Learning frameworks under the hood + Kafka Streams as generic Machine Learning infrastructure to deploy, execute and monitor these different models. + +##### Some other Github projects exist already with more ML + Kafka content: + +- Deep Learning UDF for KSQL: [Streaming Anomaly Detection of MQTT IoT Sensor Data using an Autoencoder](https://github.com/kaiwaehner/ksql-udf-deep-learning-mqtt-iot) +- End-to-End ML Integration Demo: [Continuous Health Checks with Anomaly Detection using KSQL, Kafka Connect, Deep Learning and Elasticsearch](https://github.com/kaiwaehner/ksql-fork-with-deep-learning-function) +- TensorFlow Serving + gRPC + Kafka Streams on Github => Stream Processing and RPC / Request-Response concepts combined: [Model inference with Apache Kafka, Kafka Streams and a TensorFlow model deployed on a TensorFlow Serving model server](https://github.com/kaiwaehner/tensorflow-serving-java-grpc-kafka-streams) + +## Requirements, Installation and Usage +The code is developed and tested on Mac and Linux operating systems. As Kafka does not support and work well on Windows, this is not tested at all. + +Java 8 and Maven 3 are required. Maven will download all required dependencies. + +Just download the project and run + + mvn clean package + +Apache Kafka 2.1 is currently used. The code is also compatible with Kafka and Kafka Streams 1.1 and 2.0. + +**Please make sure to run the Maven build without any changes first.** If it works without errors, you can change library versions, Java version, etc. and see if it still works or if you need to adjust code. + +Every examples includes an implementation and an unit test. The examples are very simple and lightweight. No further configuration is needed to build and run it. Though, for this reason, the generated models are also included (and increase the download size of the project). + +The unit tests use some Kafka helper classes like EmbeddedSingleNodeKafkaCluster in package **com.github.megachucky.kafka.streams.machinelearning.test.utils** so that you can run it without any other configuration or Kafka setup. +If you want to run an implementation of a main class in package **com.github.megachucky.kafka.streams.machinelearning**, you need to start a Kafka cluster (with at least one Zookeeper and one Kafka broker running) and also create the required topics. So check out the unit tests first. + + +### Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays + +**Use Case** + +Gradient Boosting Method (GBM) to predict flight delays. +A H2O generated GBM Java model (POJO) is instantiated and used in a Kafka Streams application to do interference on new events. + +**Machine Learning Technology** + +* [H2O](https://www.h2o.ai) +* Check the [H2O demo](https://github.com/h2oai/h2o-2/wiki/Hacking-Airline-DataSet-with-H2O) to understand the test and and how the model was built +* You can re-use the generated Java model attached to this project ([gbm_pojo_test.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java)) or build your own model using R, Python, Flow UI or any other technologies supported by H2O framework. + +**Source Code** + +[MachineLearning_H2O_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java) + +**Unit Test** + +[MachineLearning_H2O_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java) + +**Manual Testing** + +You can easily test this by yourself. Here are the steps: +- Start Kafka, e.g. with Confluent CLI: + + confluent start kafka +- Create topics AirlineInputTopic and AirlineOutputTopic + + kafka-topics --zookeeper localhost:2181 --create --topic AirlineInputTopic --partitions 3 --replication-factor 1 + + kafka-topics --zookeeper localhost:2181 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1 +- Start the Kafka Streams app: + + java -cp target/kafka-streams-machine-learning-examples-1.0-SNAPSHOT-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example +- Send messages, e.g. with kafkacat: + + echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic +- Consume predictions: + + kafka-console-consumer --bootstrap-server localhost:9092 --topic AirlineOutputTopic --from-beginning +- Find more details in the unit test... + + +**H2O Deep Learning instead of H2O GBM Model** + +The project includes another example with similar code to use a [H2O Deep Learning model](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) +This shows how you can easily test or replace different analytic models for one use case, or even use them for A/B testing. + +### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition +**Use Case** + +Convolutional Neural Network (CNN) to for image recognition. +A prebuilt TensorFlow CNN model is instantiated and used in a Kafka Streams application to do recognize new JPEG images. A Kafka Input Topic receives the location of a new images (another option would be to send the image in the Kafka message instead of just a link to it), infers the content of the picture via the TensorFlow model, and sends the result to a Kafka Output Topic. + +**Machine Learning Technology** +* [TensorFlow](https://www.tensorflow.org/) +* Leverages [TensorFlow for Java](https://www.tensorflow.org/install/install_java). These APIs are particularly well-suited for loading models created in Python and executing them within a Java application. Please note: The Java API doesn't yet include convenience functions (which you might know from [Keras](https://keras.io/)), thus a private helper class is used in the example for construction and execution of the pre-built TensorFlow model. +* Check the official TensorFlow demo [LabelImage](https://github.com/kaiwaehner/tensorflow/blob/r1.3/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java) to understand this image recognition example +* You can re-use the pre-trained TensorFlow model attached to this project [tensorflow_inception_graph.pb](http://arxiv.org/abs/1512.00567) or add your own model. +* The 'images' folder contains models which were used for training the model (trained_airplane_1.jpg, trained_airplane_2.jpg, trained_butterfly.jpg) but also a new picture (new_airplane.jpg) which is not known by the model and using a different resolution than the others. Feel free to add your own pictures (they need to be trained, see list of trained pictures in the file: imagenet_comp_graph_label_strings.txt), otherwise the model will return 'unknown'. + +**Source Code** + +[Kafka_Streams_TensorFlow_Image_Recognition_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java) + +**Unit Test** + +[Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java) + +### Example 3 - Iris Prediction using a Neural Network with DeepLearning4J (DL4J) +**Use Case** + +Iris Species Prediction using a Neural Network. +This is a famous example: Prediction of the Iris Species - implemented with many different ML algorithms. Here I use DeepLearning4J (DL4J) to build a neural network using Iris Dataset. + +**Machine Learning Technology** +* [DeepLearning4J](https://deeplearning4j.org) +* Pretty simple example to demo how to build, save and load neural networks with DL4J. [MultiLayerNetwork](https://deeplearning4j.org/doc/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.html) and [INDArray](http://nd4j.org/doc/org/nd4j/linalg/api/ndarray/INDArray.html) are the key APIs to look at if you want to understand the details. +* The model is created via [DeepLearning4J_CSV_Model.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model.java) and stored in the resources: [DL4J_Iris_Model.zip](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/tree/master/src/main/resources/generatedModels/DL4J). No need to re-train, just for reference. Kudos to Adam Gibson who created this example as part of the DL4J project. + +**Unit Test** +[Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java) + +### Example 4 - Python + Keras + TensorFlow + DeepLearning4j + +**Use Case** + +Development of an analytic model trained with Python, Keras and TensorFlow and deployment to Java and Kafka ecosystem. No business case, just a technical demonstration of a simple 'Hello World' Keras model. Feel free to replace the model with any other Keras model trained with your backend of choice. You just need to replace the model binary (and use a model which is compatible with DeepLearning4J 's model importer). + +**Machine Learning Technology** +* [Python](https://www.python.org/) +* [DeepLearning4J](https://deeplearning4j.org) +* [Keras](https://keras.io/) - a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano. +* [TensorFlow](https://www.tensorflow.org/) - used as backend under the hood of Keras +* DeepLearning4J 's [KerasModelImport feature](https://deeplearning4j.org/docs/latest/keras-import-overview) is used for importing the Keras / TensorFlow model into Java. The used model is its 'Hello World' model example. +* The Keras model was trained with this [Python script](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/resources/generatedModels/Keras/keras-model-script.py). + +**Unit Test** + +[Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java) + + + + + + + diff --git a/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java b/TensorFlow_Image_Recognition/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java similarity index 100% rename from src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java rename to TensorFlow_Image_Recognition/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java diff --git a/src/main/resources/TensorFlow_Images/devil.png b/TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/devil.png similarity index 100% rename from src/main/resources/TensorFlow_Images/devil.png rename to TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/devil.png diff --git a/src/main/resources/TensorFlow_Images/new_airplane.jpg b/TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/new_airplane.jpg similarity index 100% rename from src/main/resources/TensorFlow_Images/new_airplane.jpg rename to TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/new_airplane.jpg diff --git a/src/main/resources/TensorFlow_Images/trained_airplane_1.jpg b/TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/trained_airplane_1.jpg similarity index 100% rename from src/main/resources/TensorFlow_Images/trained_airplane_1.jpg rename to TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/trained_airplane_1.jpg diff --git a/src/main/resources/TensorFlow_Images/trained_airplane_2.jpg b/TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/trained_airplane_2.jpg similarity index 100% rename from src/main/resources/TensorFlow_Images/trained_airplane_2.jpg rename to TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/trained_airplane_2.jpg diff --git a/src/main/resources/TensorFlow_Images/trained_butterfly.jpg b/TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/trained_butterfly.jpg similarity index 100% rename from src/main/resources/TensorFlow_Images/trained_butterfly.jpg rename to TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/trained_butterfly.jpg diff --git a/src/main/resources/generatedModels/CNN_inception5h/LICENSE b/TensorFlow_Image_Recognition/src/main/resources/generatedModels/CNN_inception5h/LICENSE similarity index 100% rename from src/main/resources/generatedModels/CNN_inception5h/LICENSE rename to TensorFlow_Image_Recognition/src/main/resources/generatedModels/CNN_inception5h/LICENSE diff --git a/src/main/resources/generatedModels/CNN_inception5h/imagenet_comp_graph_label_strings.txt b/TensorFlow_Image_Recognition/src/main/resources/generatedModels/CNN_inception5h/imagenet_comp_graph_label_strings.txt similarity index 100% rename from src/main/resources/generatedModels/CNN_inception5h/imagenet_comp_graph_label_strings.txt rename to TensorFlow_Image_Recognition/src/main/resources/generatedModels/CNN_inception5h/imagenet_comp_graph_label_strings.txt diff --git a/src/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb b/TensorFlow_Image_Recognition/src/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb similarity index 100% rename from src/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb rename to TensorFlow_Image_Recognition/src/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb diff --git a/src/main/resources/generatedModels/TensorFlow_Census/saved_model.pb b/TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/saved_model.pb similarity index 100% rename from src/main/resources/generatedModels/TensorFlow_Census/saved_model.pb rename to TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/saved_model.pb diff --git a/src/main/resources/generatedModels/TensorFlow_Census/test.json b/TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/test.json old mode 100755 new mode 100644 similarity index 100% rename from src/main/resources/generatedModels/TensorFlow_Census/test.json rename to TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/test.json diff --git a/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001 b/TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001 similarity index 100% rename from src/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001 rename to TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001 diff --git a/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.index b/TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.index similarity index 100% rename from src/main/resources/generatedModels/TensorFlow_Census/variables/variables.index rename to TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.index diff --git a/TensorFlow_Image_Recognition/src/main/resources/log4j.properties b/TensorFlow_Image_Recognition/src/main/resources/log4j.properties new file mode 100644 index 0000000..d511cbd --- /dev/null +++ b/TensorFlow_Image_Recognition/src/main/resources/log4j.properties @@ -0,0 +1,5 @@ +log4j.rootLogger=INFO, stdout + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n \ No newline at end of file diff --git a/TensorFlow_Image_Recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java b/TensorFlow_Image_Recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java new file mode 100644 index 0000000..d3c7b18 --- /dev/null +++ b/TensorFlow_Image_Recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java @@ -0,0 +1,45 @@ +package com.github.jukkakarvanen.kafka.streams.integration.utils; + +import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; + +/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. + * + * @author Jukka Karvanen + * + * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 + * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will catch and ignore the exception + * happening during the tear down of the test + * The exception does not have affect to functionality + */ + +public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster { + private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class); + + public TestEmbeddedKafkaCluster(int numBrokers) { + super(numBrokers); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) { + super(numBrokers, brokerConfig); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) { + super(numBrokers, brokerConfig, mockTimeMillisStart); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, long mockTimeNanoStart) { + super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart); + } + + public void after() { + try { + super.after(); + } catch (RuntimeException e) { + log.warn("Ignoring exception, test failing in Windows due this exception {}", e); + } + } +} diff --git a/TensorFlow_Image_Recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java b/TensorFlow_Image_Recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java new file mode 100644 index 0000000..ef2bc4e --- /dev/null +++ b/TensorFlow_Image_Recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java @@ -0,0 +1,47 @@ +package com.github.jukkakarvanen.kafka.streams.integration.utils; + +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.streams.KafkaClientSupplier; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.Topology; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; + +/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. + * + * @author Jukka Karvanen + * + * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 + * Replacing KafkaStreams with TestKafkaStreams will catch and ignore the exception caused by cleanUp + * The exception does not have affect to functionality + */ + +public class TestKafkaStreams extends KafkaStreams { + private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class); + + public TestKafkaStreams(Topology topology, Properties props) { + super(topology, props); + } + + public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) { + super(topology, props, clientSupplier); + } + + public TestKafkaStreams(Topology topology, Properties props, Time time) { + super(topology, props, time); + } + + public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) { + super(topology, props, clientSupplier, time); + } + + public void cleanUp() { + try { + super.cleanUp(); + } catch (RuntimeException e) { + log.warn("Ignoring exception, test failing in Windows due this exception {}", e); + } + } +} diff --git a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java b/TensorFlow_Image_Recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java similarity index 100% rename from src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java rename to TensorFlow_Image_Recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java diff --git a/TensorFlow_Keras/pom.xml b/TensorFlow_Keras/pom.xml new file mode 100644 index 0000000..501a7df --- /dev/null +++ b/TensorFlow_Keras/pom.xml @@ -0,0 +1,180 @@ + + + 4.0.0 + + com.github.kaiwaehner.kafka.streams.machinelearning + tensorflow-keras + CP51_AK21 + + + + confluent + http://packages.confluent.io/maven/ + + + + + 1.8 + 2.1.0 + 2.11 + ${kafka.scala.version}.8 + 5.1.0 + UTF-8 + + + + + + + + org.apache.kafka + kafka-streams + ${kafka.version} + + + + + + + + org.nd4j + nd4j-native-platform + 1.0.0-beta3 + + + + + + org.deeplearning4j + deeplearning4j-core + 1.0.0-beta3 + + + + org.deeplearning4j + deeplearning4j-modelimport + 1.0.0-beta3 + + + + + + org.tensorflow + tensorflow + 1.3.0 + + + + + + ai.h2o + h2o-genmodel + 3.14.0.1 + + + + + + junit + junit + 4.12 + test + + + org.assertj + assertj-core + 3.3.0 + test + + + org.apache.kafka + kafka_${kafka.scala.version} + ${kafka.version} + test + test + + + org.apache.kafka + kafka-clients + ${kafka.version} + test + test + + + org.apache.kafka + kafka-streams + ${kafka.version} + test + test + + + org.apache.curator + curator-test + 2.9.0 + test + + + io.confluent + kafka-schema-registry + ${confluent.version} + test + + + io.confluent + kafka-schema-registry + ${confluent.version} + + tests + test + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.6.1 + + 1.8 + 1.8 + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.5.2 + + + jar-with-dependencies + + + + true + com.github.megachucky.kafka.streams.machinelearning.StreamsStarterApp + + + + + + assemble-all + package + + single + + + + + + + diff --git a/TensorFlow_Keras/readme.md b/TensorFlow_Keras/readme.md new file mode 100644 index 0000000..e684ab6 --- /dev/null +++ b/TensorFlow_Keras/readme.md @@ -0,0 +1,168 @@ +# Machine Learning + Kafka Streams Examples + +This project contains **examples which demonstrate how to deploy analytic models to mission-critical, scalable production leveraging [Apache Kafka](https://kafka.apache.org/) and its [Streams API](docs.confluent.io/current/streams/index.html).** +Examples will include analytic models built with TensorFlow, Keras, H2O, Python, DeepLearning4J and other technologies. + +![Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure](http://www.kai-waehner.de/blog/wp-content/uploads/2017/10/Apache_Kafka_Ecosystem_Kafka_Streams_Machine_Learning.png "Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure") + + +## Material (Blogs Posts, Slides, Videos) +Here is some material about this topic if you want to read and listen to the theory instead of just doing hands-on: +- Blog Post: [How to Build and Deploy Scalable Machine Learning in Production with Apache Kafka](https://www.confluent.io/blog/build-deploy-scalable-machine-learning-production-apache-kafka/) +- Slide Deck: [Apache Kafka + Machine Learning => Intelligent Real Time Applications](https://www.slideshare.net/KaiWaehner/apache-kafka-streams-machine-learning-deep-learning) +- Slide Deck: [Deep Learning at Extreme Scale (in the Cloud) 
with the Apache Kafka Open Source Ecosystem](https://www.slideshare.net/KaiWaehner/deep-learning-at-extreme-scale-in-the-cloud-with-the-apache-kafka-open-source-ecosystem) +- Video Recording: [Deep Learning in Mission Critical and Scalable Real Time Applications with Open Source Frameworks](https://vimeo.com/jaxtv/review/256406763/7fbf4213be) +- Blog Post: [Using Apache Kafka to Drive Cutting-Edge Machine Learning - Hybrid ML Architectures, AutoML, and more...](https://www.confluent.io/blog/using-apache-kafka-drive-cutting-edge-machine-learning) +- Blog Post: [Machine Learning with Python, Jupyter, KSQL and TensorFlow](https://www.confluent.io/blog/machine-learning-with-python-jupyter-ksql-tensorflow) + +## Use Cases and Technologies + +##### The following examples are already available including unit tests: + +* Deployment of a H2O GBM model to a Kafka Streams application for prediction of flight delays +* Deployment of a H2O Deep Learning model to a Kafka Streams application for prediction of flight delays +* Deployment of a pre-built TensorFlow CNN model for image recognition +* Deployment of a DL4J model to predict the species of Iris flowers +* Deployment of a Keras model (trained with TensorFlow backend) using the Import Model API from DeepLearning4J + +**More sophisticated use cases around Kafka Streams and other technologies will be added over time in this or related Github project. Some ideas**: + +* Image Recognition with H2O and TensorFlow (to show the difference of using H2O instead of using just low level TensorFlow APIs) +* Anomaly Detection with Autoencoders leveraging DeepLearning4J. +* Cross Selling and Customer Churn Detection using classical Machine Learning algorithms but also Deep Learning +* Stateful Stream Processing to combine different model execution steps into a more powerful workflow instead of "just" inferencing single events (a good example might be a streaming process with sliding or session windows). +* Keras to build different models with Python, TensorFlow, Theano and other Deep Learning frameworks under the hood + Kafka Streams as generic Machine Learning infrastructure to deploy, execute and monitor these different models. + +##### Some other Github projects exist already with more ML + Kafka content: + +- Deep Learning UDF for KSQL: [Streaming Anomaly Detection of MQTT IoT Sensor Data using an Autoencoder](https://github.com/kaiwaehner/ksql-udf-deep-learning-mqtt-iot) +- End-to-End ML Integration Demo: [Continuous Health Checks with Anomaly Detection using KSQL, Kafka Connect, Deep Learning and Elasticsearch](https://github.com/kaiwaehner/ksql-fork-with-deep-learning-function) +- TensorFlow Serving + gRPC + Kafka Streams on Github => Stream Processing and RPC / Request-Response concepts combined: [Model inference with Apache Kafka, Kafka Streams and a TensorFlow model deployed on a TensorFlow Serving model server](https://github.com/kaiwaehner/tensorflow-serving-java-grpc-kafka-streams) + +## Requirements, Installation and Usage +The code is developed and tested on Mac and Linux operating systems. As Kafka does not support and work well on Windows, this is not tested at all. + +Java 8 and Maven 3 are required. Maven will download all required dependencies. + +Just download the project and run + + mvn clean package + +Apache Kafka 2.1 is currently used. The code is also compatible with Kafka and Kafka Streams 1.1 and 2.0. + +**Please make sure to run the Maven build without any changes first.** If it works without errors, you can change library versions, Java version, etc. and see if it still works or if you need to adjust code. + +Every examples includes an implementation and an unit test. The examples are very simple and lightweight. No further configuration is needed to build and run it. Though, for this reason, the generated models are also included (and increase the download size of the project). + +The unit tests use some Kafka helper classes like EmbeddedSingleNodeKafkaCluster in package **com.github.megachucky.kafka.streams.machinelearning.test.utils** so that you can run it without any other configuration or Kafka setup. +If you want to run an implementation of a main class in package **com.github.megachucky.kafka.streams.machinelearning**, you need to start a Kafka cluster (with at least one Zookeeper and one Kafka broker running) and also create the required topics. So check out the unit tests first. + + +### Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays + +**Use Case** + +Gradient Boosting Method (GBM) to predict flight delays. +A H2O generated GBM Java model (POJO) is instantiated and used in a Kafka Streams application to do interference on new events. + +**Machine Learning Technology** + +* [H2O](https://www.h2o.ai) +* Check the [H2O demo](https://github.com/h2oai/h2o-2/wiki/Hacking-Airline-DataSet-with-H2O) to understand the test and and how the model was built +* You can re-use the generated Java model attached to this project ([gbm_pojo_test.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java)) or build your own model using R, Python, Flow UI or any other technologies supported by H2O framework. + +**Source Code** + +[MachineLearning_H2O_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java) + +**Unit Test** + +[MachineLearning_H2O_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java) + +**Manual Testing** + +You can easily test this by yourself. Here are the steps: +- Start Kafka, e.g. with Confluent CLI: + + confluent start kafka +- Create topics AirlineInputTopic and AirlineOutputTopic + + kafka-topics --zookeeper localhost:2181 --create --topic AirlineInputTopic --partitions 3 --replication-factor 1 + + kafka-topics --zookeeper localhost:2181 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1 +- Start the Kafka Streams app: + + java -cp target/kafka-streams-machine-learning-examples-1.0-SNAPSHOT-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example +- Send messages, e.g. with kafkacat: + + echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic +- Consume predictions: + + kafka-console-consumer --bootstrap-server localhost:9092 --topic AirlineOutputTopic --from-beginning +- Find more details in the unit test... + + +**H2O Deep Learning instead of H2O GBM Model** + +The project includes another example with similar code to use a [H2O Deep Learning model](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) +This shows how you can easily test or replace different analytic models for one use case, or even use them for A/B testing. + +### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition +**Use Case** + +Convolutional Neural Network (CNN) to for image recognition. +A prebuilt TensorFlow CNN model is instantiated and used in a Kafka Streams application to do recognize new JPEG images. A Kafka Input Topic receives the location of a new images (another option would be to send the image in the Kafka message instead of just a link to it), infers the content of the picture via the TensorFlow model, and sends the result to a Kafka Output Topic. + +**Machine Learning Technology** +* [TensorFlow](https://www.tensorflow.org/) +* Leverages [TensorFlow for Java](https://www.tensorflow.org/install/install_java). These APIs are particularly well-suited for loading models created in Python and executing them within a Java application. Please note: The Java API doesn't yet include convenience functions (which you might know from [Keras](https://keras.io/)), thus a private helper class is used in the example for construction and execution of the pre-built TensorFlow model. +* Check the official TensorFlow demo [LabelImage](https://github.com/kaiwaehner/tensorflow/blob/r1.3/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java) to understand this image recognition example +* You can re-use the pre-trained TensorFlow model attached to this project [tensorflow_inception_graph.pb](http://arxiv.org/abs/1512.00567) or add your own model. +* The 'images' folder contains models which were used for training the model (trained_airplane_1.jpg, trained_airplane_2.jpg, trained_butterfly.jpg) but also a new picture (new_airplane.jpg) which is not known by the model and using a different resolution than the others. Feel free to add your own pictures (they need to be trained, see list of trained pictures in the file: imagenet_comp_graph_label_strings.txt), otherwise the model will return 'unknown'. + +**Source Code** + +[Kafka_Streams_TensorFlow_Image_Recognition_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java) + +**Unit Test** + +[Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java) + +### Example 3 - Iris Prediction using a Neural Network with DeepLearning4J (DL4J) +**Use Case** + +Iris Species Prediction using a Neural Network. +This is a famous example: Prediction of the Iris Species - implemented with many different ML algorithms. Here I use DeepLearning4J (DL4J) to build a neural network using Iris Dataset. + +**Machine Learning Technology** +* [DeepLearning4J](https://deeplearning4j.org) +* Pretty simple example to demo how to build, save and load neural networks with DL4J. [MultiLayerNetwork](https://deeplearning4j.org/doc/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.html) and [INDArray](http://nd4j.org/doc/org/nd4j/linalg/api/ndarray/INDArray.html) are the key APIs to look at if you want to understand the details. +* The model is created via [DeepLearning4J_CSV_Model.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model.java) and stored in the resources: [DL4J_Iris_Model.zip](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/tree/master/src/main/resources/generatedModels/DL4J). No need to re-train, just for reference. Kudos to Adam Gibson who created this example as part of the DL4J project. + +**Unit Test** +[Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java) + +### Example 4 - Python + Keras + TensorFlow + DeepLearning4j + +**Use Case** + +Development of an analytic model trained with Python, Keras and TensorFlow and deployment to Java and Kafka ecosystem. No business case, just a technical demonstration of a simple 'Hello World' Keras model. Feel free to replace the model with any other Keras model trained with your backend of choice. You just need to replace the model binary (and use a model which is compatible with DeepLearning4J 's model importer). + +**Machine Learning Technology** +* [Python](https://www.python.org/) +* [DeepLearning4J](https://deeplearning4j.org) +* [Keras](https://keras.io/) - a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano. +* [TensorFlow](https://www.tensorflow.org/) - used as backend under the hood of Keras +* DeepLearning4J 's [KerasModelImport feature](https://deeplearning4j.org/docs/latest/keras-import-overview) is used for importing the Keras / TensorFlow model into Java. The used model is its 'Hello World' model example. +* The Keras model was trained with this [Python script](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/resources/generatedModels/Keras/keras-model-script.py). + +**Unit Test** + +[Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java) + + + + + + + diff --git a/src/main/resources/generatedModels/Keras/keras-model-script.py b/TensorFlow_Keras/src/main/resources/generatedModels/Keras/keras-model-script.py similarity index 100% rename from src/main/resources/generatedModels/Keras/keras-model-script.py rename to TensorFlow_Keras/src/main/resources/generatedModels/Keras/keras-model-script.py diff --git a/src/main/resources/generatedModels/Keras/simple_mlp.h5 b/TensorFlow_Keras/src/main/resources/generatedModels/Keras/simple_mlp.h5 similarity index 100% rename from src/main/resources/generatedModels/Keras/simple_mlp.h5 rename to TensorFlow_Keras/src/main/resources/generatedModels/Keras/simple_mlp.h5 diff --git a/TensorFlow_Keras/src/main/resources/log4j.properties b/TensorFlow_Keras/src/main/resources/log4j.properties new file mode 100644 index 0000000..d511cbd --- /dev/null +++ b/TensorFlow_Keras/src/main/resources/log4j.properties @@ -0,0 +1,5 @@ +log4j.rootLogger=INFO, stdout + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%p %m (%c:%L) %n \ No newline at end of file diff --git a/TensorFlow_Keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java b/TensorFlow_Keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java new file mode 100644 index 0000000..d3c7b18 --- /dev/null +++ b/TensorFlow_Keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java @@ -0,0 +1,45 @@ +package com.github.jukkakarvanen.kafka.streams.integration.utils; + +import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; + +/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. + * + * @author Jukka Karvanen + * + * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 + * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will catch and ignore the exception + * happening during the tear down of the test + * The exception does not have affect to functionality + */ + +public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster { + private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class); + + public TestEmbeddedKafkaCluster(int numBrokers) { + super(numBrokers); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) { + super(numBrokers, brokerConfig); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) { + super(numBrokers, brokerConfig, mockTimeMillisStart); + } + + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, long mockTimeNanoStart) { + super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart); + } + + public void after() { + try { + super.after(); + } catch (RuntimeException e) { + log.warn("Ignoring exception, test failing in Windows due this exception {}", e); + } + } +} diff --git a/TensorFlow_Keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java b/TensorFlow_Keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java new file mode 100644 index 0000000..ef2bc4e --- /dev/null +++ b/TensorFlow_Keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java @@ -0,0 +1,47 @@ +package com.github.jukkakarvanen.kafka.streams.integration.utils; + +import org.apache.kafka.common.utils.Time; +import org.apache.kafka.streams.KafkaClientSupplier; +import org.apache.kafka.streams.KafkaStreams; +import org.apache.kafka.streams.Topology; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; + +/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. + * + * @author Jukka Karvanen + * + * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 + * Replacing KafkaStreams with TestKafkaStreams will catch and ignore the exception caused by cleanUp + * The exception does not have affect to functionality + */ + +public class TestKafkaStreams extends KafkaStreams { + private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class); + + public TestKafkaStreams(Topology topology, Properties props) { + super(topology, props); + } + + public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) { + super(topology, props, clientSupplier); + } + + public TestKafkaStreams(Topology topology, Properties props, Time time) { + super(topology, props, time); + } + + public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) { + super(topology, props, clientSupplier, time); + } + + public void cleanUp() { + try { + super.cleanUp(); + } catch (RuntimeException e) { + log.warn("Ignoring exception, test failing in Windows due this exception {}", e); + } + } +} diff --git a/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java b/TensorFlow_Keras/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java similarity index 100% rename from src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java rename to TensorFlow_Keras/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java From 306e688fc69a2f0ecec2a3373cb6dc97c263aa48 Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Thu, 28 Mar 2019 17:13:39 +0200 Subject: [PATCH 04/20] maven modules to compile all --- .../pom.xml | 2 +- .../readme.md | 0 .../models/DeepLearning4J_CSV_Iris_Model.java | 0 .../DeepLearning4J_CSV_Model_Inference.java | 0 .../main/resources/DL4J_Resources/iris.txt | 0 .../generatedModels/DL4J/DL4J_Iris_Model.zip | Bin .../src/main/resources/log4j.properties | 0 .../utils/TestEmbeddedKafkaCluster.java | 0 .../integration/utils/TestKafkaStreams.java | 0 ...L4J_DeepLearning_Iris_IntegrationTest.java | 0 .../pom.xml | 0 .../readme.md | 0 ...hineLearning_H2O_DeepLearning_Example.java | 0 ...reams_MachineLearning_H2O_GBM_Example.java | 0 ...neLearning_H2O_GBM_Example_Refactored.java | 0 ..._fe7c1f02_08ec_4070_b784_c2531147e451.java | 0 .../machinelearning/models/gbm_pojo_test.java | 0 ...DeepWater_model_python_1503570558230_1.zip | Bin .../GBM_model_python_1503397740678_1.zip | Bin .../src/main/resources/log4j.properties | 0 .../utils/TestEmbeddedKafkaCluster.java | 0 .../integration/utils/TestKafkaStreams.java | 0 ..._DeepLearning_Example_IntegrationTest.java | 0 ...rning_H2O_GBM_Example_IntegrationTest.java | 0 pom.xml | 183 ++---------------- .../pom.xml | 0 .../readme.md | 0 ..._TensorFlow_Image_Recognition_Example.java | 0 .../resources/TensorFlow_Images/devil.png | Bin .../TensorFlow_Images/new_airplane.jpg | Bin .../TensorFlow_Images/trained_airplane_1.jpg | Bin .../TensorFlow_Images/trained_airplane_2.jpg | Bin .../TensorFlow_Images/trained_butterfly.jpg | Bin .../generatedModels/CNN_inception5h/LICENSE | 0 .../imagenet_comp_graph_label_strings.txt | 0 .../tensorflow_inception_graph.pb | Bin .../TensorFlow_Census/saved_model.pb | Bin .../TensorFlow_Census/test.json | 0 .../variables/variables.data-00000-of-00001 | Bin .../variables/variables.index | Bin .../src/main/resources/log4j.properties | 0 .../utils/TestEmbeddedKafkaCluster.java | 0 .../integration/utils/TestKafkaStreams.java | 0 ...e_Recognition_Example_IntegrationTest.java | 0 .../pom.xml | 0 .../readme.md | 0 .../Keras/keras-model-script.py | 0 .../generatedModels/Keras/simple_mlp.h5 | Bin .../src/main/resources/log4j.properties | 0 .../utils/TestEmbeddedKafkaCluster.java | 0 .../integration/utils/TestKafkaStreams.java | 0 ...sorFlow_Keras_Example_IntegrationTest.java | 0 52 files changed, 13 insertions(+), 172 deletions(-) rename {DL4J_DeepLearning_Iris => dl4j-deeplearning-iris}/pom.xml (99%) rename {DL4J_DeepLearning_Iris => dl4j-deeplearning-iris}/readme.md (100%) rename {DL4J_DeepLearning_Iris => dl4j-deeplearning-iris}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Iris_Model.java (100%) rename {DL4J_DeepLearning_Iris => dl4j-deeplearning-iris}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model_Inference.java (100%) rename {DL4J_DeepLearning_Iris => dl4j-deeplearning-iris}/src/main/resources/DL4J_Resources/iris.txt (100%) rename {DL4J_DeepLearning_Iris => dl4j-deeplearning-iris}/src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip (100%) rename {DL4J_DeepLearning_Iris => dl4j-deeplearning-iris}/src/main/resources/log4j.properties (100%) rename {DL4J_DeepLearning_Iris => dl4j-deeplearning-iris}/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java (100%) rename {DL4J_DeepLearning_Iris => dl4j-deeplearning-iris}/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java (100%) rename {DL4J_DeepLearning_Iris => dl4j-deeplearning-iris}/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java (100%) rename {H2O_DeepLearning => h20-deeplearning}/pom.xml (100%) rename {H2O_DeepLearning => h20-deeplearning}/readme.md (100%) rename {H2O_DeepLearning => h20-deeplearning}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java (100%) rename {H2O_DeepLearning => h20-deeplearning}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java (100%) rename {H2O_DeepLearning => h20-deeplearning}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java (100%) rename {H2O_DeepLearning => h20-deeplearning}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java (100%) rename {H2O_DeepLearning => h20-deeplearning}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java (100%) rename {H2O_DeepLearning => h20-deeplearning}/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip (100%) rename {H2O_DeepLearning => h20-deeplearning}/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip (100%) rename {H2O_DeepLearning => h20-deeplearning}/src/main/resources/log4j.properties (100%) rename {H2O_DeepLearning => h20-deeplearning}/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java (100%) rename {H2O_DeepLearning => h20-deeplearning}/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java (100%) rename {H2O_DeepLearning => h20-deeplearning}/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java (100%) rename {H2O_DeepLearning => h20-deeplearning}/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/pom.xml (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/readme.md (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/resources/TensorFlow_Images/devil.png (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/resources/TensorFlow_Images/new_airplane.jpg (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/resources/TensorFlow_Images/trained_airplane_1.jpg (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/resources/TensorFlow_Images/trained_airplane_2.jpg (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/resources/TensorFlow_Images/trained_butterfly.jpg (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/resources/generatedModels/CNN_inception5h/LICENSE (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/resources/generatedModels/CNN_inception5h/imagenet_comp_graph_label_strings.txt (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/resources/generatedModels/TensorFlow_Census/saved_model.pb (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/resources/generatedModels/TensorFlow_Census/test.json (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001 (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.index (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/main/resources/log4j.properties (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java (100%) rename {TensorFlow_Image_Recognition => tensorflow-image-recognition}/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java (100%) rename {TensorFlow_Keras => tensorflow-keras}/pom.xml (100%) rename {TensorFlow_Keras => tensorflow-keras}/readme.md (100%) rename {TensorFlow_Keras => tensorflow-keras}/src/main/resources/generatedModels/Keras/keras-model-script.py (100%) rename {TensorFlow_Keras => tensorflow-keras}/src/main/resources/generatedModels/Keras/simple_mlp.h5 (100%) rename {TensorFlow_Keras => tensorflow-keras}/src/main/resources/log4j.properties (100%) rename {TensorFlow_Keras => tensorflow-keras}/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java (100%) rename {TensorFlow_Keras => tensorflow-keras}/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java (100%) rename {TensorFlow_Keras => tensorflow-keras}/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java (100%) diff --git a/DL4J_DeepLearning_Iris/pom.xml b/dl4j-deeplearning-iris/pom.xml similarity index 99% rename from DL4J_DeepLearning_Iris/pom.xml rename to dl4j-deeplearning-iris/pom.xml index 85f7099..7dfb917 100644 --- a/DL4J_DeepLearning_Iris/pom.xml +++ b/dl4j-deeplearning-iris/pom.xml @@ -4,7 +4,7 @@ 4.0.0 com.github.kaiwaehner.kafka.streams.machinelearning - dl4j-deepLearning-iris + dl4j-deeplearning-iris CP51_AK21 diff --git a/DL4J_DeepLearning_Iris/readme.md b/dl4j-deeplearning-iris/readme.md similarity index 100% rename from DL4J_DeepLearning_Iris/readme.md rename to dl4j-deeplearning-iris/readme.md diff --git a/DL4J_DeepLearning_Iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Iris_Model.java b/dl4j-deeplearning-iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Iris_Model.java similarity index 100% rename from DL4J_DeepLearning_Iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Iris_Model.java rename to dl4j-deeplearning-iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Iris_Model.java diff --git a/DL4J_DeepLearning_Iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model_Inference.java b/dl4j-deeplearning-iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model_Inference.java similarity index 100% rename from DL4J_DeepLearning_Iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model_Inference.java rename to dl4j-deeplearning-iris/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model_Inference.java diff --git a/DL4J_DeepLearning_Iris/src/main/resources/DL4J_Resources/iris.txt b/dl4j-deeplearning-iris/src/main/resources/DL4J_Resources/iris.txt similarity index 100% rename from DL4J_DeepLearning_Iris/src/main/resources/DL4J_Resources/iris.txt rename to dl4j-deeplearning-iris/src/main/resources/DL4J_Resources/iris.txt diff --git a/DL4J_DeepLearning_Iris/src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip b/dl4j-deeplearning-iris/src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip similarity index 100% rename from DL4J_DeepLearning_Iris/src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip rename to dl4j-deeplearning-iris/src/main/resources/generatedModels/DL4J/DL4J_Iris_Model.zip diff --git a/DL4J_DeepLearning_Iris/src/main/resources/log4j.properties b/dl4j-deeplearning-iris/src/main/resources/log4j.properties similarity index 100% rename from DL4J_DeepLearning_Iris/src/main/resources/log4j.properties rename to dl4j-deeplearning-iris/src/main/resources/log4j.properties diff --git a/DL4J_DeepLearning_Iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java b/dl4j-deeplearning-iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java similarity index 100% rename from DL4J_DeepLearning_Iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java rename to dl4j-deeplearning-iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java diff --git a/DL4J_DeepLearning_Iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java b/dl4j-deeplearning-iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java similarity index 100% rename from DL4J_DeepLearning_Iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java rename to dl4j-deeplearning-iris/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java diff --git a/DL4J_DeepLearning_Iris/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java b/dl4j-deeplearning-iris/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java similarity index 100% rename from DL4J_DeepLearning_Iris/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java rename to dl4j-deeplearning-iris/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java diff --git a/H2O_DeepLearning/pom.xml b/h20-deeplearning/pom.xml similarity index 100% rename from H2O_DeepLearning/pom.xml rename to h20-deeplearning/pom.xml diff --git a/H2O_DeepLearning/readme.md b/h20-deeplearning/readme.md similarity index 100% rename from H2O_DeepLearning/readme.md rename to h20-deeplearning/readme.md diff --git a/H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java b/h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java similarity index 100% rename from H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java rename to h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java diff --git a/H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java b/h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java similarity index 100% rename from H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java rename to h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java diff --git a/H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java b/h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java similarity index 100% rename from H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java rename to h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java diff --git a/H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java b/h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java similarity index 100% rename from H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java rename to h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java diff --git a/H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java b/h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java similarity index 100% rename from H2O_DeepLearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java rename to h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java diff --git a/H2O_DeepLearning/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip b/h20-deeplearning/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip similarity index 100% rename from H2O_DeepLearning/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip rename to h20-deeplearning/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip diff --git a/H2O_DeepLearning/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip b/h20-deeplearning/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip similarity index 100% rename from H2O_DeepLearning/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip rename to h20-deeplearning/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip diff --git a/H2O_DeepLearning/src/main/resources/log4j.properties b/h20-deeplearning/src/main/resources/log4j.properties similarity index 100% rename from H2O_DeepLearning/src/main/resources/log4j.properties rename to h20-deeplearning/src/main/resources/log4j.properties diff --git a/H2O_DeepLearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java b/h20-deeplearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java similarity index 100% rename from H2O_DeepLearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java rename to h20-deeplearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java diff --git a/H2O_DeepLearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java b/h20-deeplearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java similarity index 100% rename from H2O_DeepLearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java rename to h20-deeplearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java diff --git a/H2O_DeepLearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java b/h20-deeplearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java similarity index 100% rename from H2O_DeepLearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java rename to h20-deeplearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java diff --git a/H2O_DeepLearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java b/h20-deeplearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java similarity index 100% rename from H2O_DeepLearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java rename to h20-deeplearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java diff --git a/pom.xml b/pom.xml index 3da6f68..88d0030 100644 --- a/pom.xml +++ b/pom.xml @@ -6,175 +6,16 @@ com.github.kaiwaehner.kafka.streams.machinelearning kafka-streams-machine-learning-examples CP51_AK21 - - - - confluent - http://packages.confluent.io/maven/ - - - - - 1.8 - 2.1.0 - 2.11 - ${kafka.scala.version}.8 - 5.1.0 - UTF-8 - - - - - - - - org.apache.kafka - kafka-streams - ${kafka.version} - - - - - - - - org.nd4j - nd4j-native-platform - 1.0.0-beta3 - - - - - - org.deeplearning4j - deeplearning4j-core - 1.0.0-beta3 - - - - org.deeplearning4j - deeplearning4j-modelimport - 1.0.0-beta3 - - - - - - org.tensorflow - tensorflow - 1.3.0 - - - - - - ai.h2o - h2o-genmodel - 3.14.0.1 - - - - - - junit - junit - 4.12 - test - - - org.assertj - assertj-core - 3.3.0 - test - - - org.apache.kafka - kafka_${kafka.scala.version} - ${kafka.version} - test - test - - - org.apache.kafka - kafka-clients - ${kafka.version} - test - test - - - org.apache.kafka - kafka-streams - ${kafka.version} - test - test - - - org.apache.curator - curator-test - 2.9.0 - test - - - io.confluent - kafka-schema-registry - ${confluent.version} - test - - - io.confluent - kafka-schema-registry - ${confluent.version} - - tests - test - - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.6.1 - - 1.8 - 1.8 - - - - - - org.apache.maven.plugins - maven-assembly-plugin - 2.5.2 - - - jar-with-dependencies - - - - true - com.github.megachucky.kafka.streams.machinelearning.StreamsStarterApp - - - - - - assemble-all - package - - single - - - - - - + pom + + + + h20-deeplearning + + tensorflow-image-recognition + + dl4j-deeplearning-iris + + tensorflow-keras + diff --git a/TensorFlow_Image_Recognition/pom.xml b/tensorflow-image-recognition/pom.xml similarity index 100% rename from TensorFlow_Image_Recognition/pom.xml rename to tensorflow-image-recognition/pom.xml diff --git a/TensorFlow_Image_Recognition/readme.md b/tensorflow-image-recognition/readme.md similarity index 100% rename from TensorFlow_Image_Recognition/readme.md rename to tensorflow-image-recognition/readme.md diff --git a/TensorFlow_Image_Recognition/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java b/tensorflow-image-recognition/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java similarity index 100% rename from TensorFlow_Image_Recognition/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java rename to tensorflow-image-recognition/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java diff --git a/TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/devil.png b/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/devil.png similarity index 100% rename from TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/devil.png rename to tensorflow-image-recognition/src/main/resources/TensorFlow_Images/devil.png diff --git a/TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/new_airplane.jpg b/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/new_airplane.jpg similarity index 100% rename from TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/new_airplane.jpg rename to tensorflow-image-recognition/src/main/resources/TensorFlow_Images/new_airplane.jpg diff --git a/TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/trained_airplane_1.jpg b/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_airplane_1.jpg similarity index 100% rename from TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/trained_airplane_1.jpg rename to tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_airplane_1.jpg diff --git a/TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/trained_airplane_2.jpg b/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_airplane_2.jpg similarity index 100% rename from TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/trained_airplane_2.jpg rename to tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_airplane_2.jpg diff --git a/TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/trained_butterfly.jpg b/tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_butterfly.jpg similarity index 100% rename from TensorFlow_Image_Recognition/src/main/resources/TensorFlow_Images/trained_butterfly.jpg rename to tensorflow-image-recognition/src/main/resources/TensorFlow_Images/trained_butterfly.jpg diff --git a/TensorFlow_Image_Recognition/src/main/resources/generatedModels/CNN_inception5h/LICENSE b/tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/LICENSE similarity index 100% rename from TensorFlow_Image_Recognition/src/main/resources/generatedModels/CNN_inception5h/LICENSE rename to tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/LICENSE diff --git a/TensorFlow_Image_Recognition/src/main/resources/generatedModels/CNN_inception5h/imagenet_comp_graph_label_strings.txt b/tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/imagenet_comp_graph_label_strings.txt similarity index 100% rename from TensorFlow_Image_Recognition/src/main/resources/generatedModels/CNN_inception5h/imagenet_comp_graph_label_strings.txt rename to tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/imagenet_comp_graph_label_strings.txt diff --git a/TensorFlow_Image_Recognition/src/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb b/tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb similarity index 100% rename from TensorFlow_Image_Recognition/src/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb rename to tensorflow-image-recognition/src/main/resources/generatedModels/CNN_inception5h/tensorflow_inception_graph.pb diff --git a/TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/saved_model.pb b/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/saved_model.pb similarity index 100% rename from TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/saved_model.pb rename to tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/saved_model.pb diff --git a/TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/test.json b/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/test.json similarity index 100% rename from TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/test.json rename to tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/test.json diff --git a/TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001 b/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001 similarity index 100% rename from TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001 rename to tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.data-00000-of-00001 diff --git a/TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.index b/tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.index similarity index 100% rename from TensorFlow_Image_Recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.index rename to tensorflow-image-recognition/src/main/resources/generatedModels/TensorFlow_Census/variables/variables.index diff --git a/TensorFlow_Image_Recognition/src/main/resources/log4j.properties b/tensorflow-image-recognition/src/main/resources/log4j.properties similarity index 100% rename from TensorFlow_Image_Recognition/src/main/resources/log4j.properties rename to tensorflow-image-recognition/src/main/resources/log4j.properties diff --git a/TensorFlow_Image_Recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java b/tensorflow-image-recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java similarity index 100% rename from TensorFlow_Image_Recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java rename to tensorflow-image-recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java diff --git a/TensorFlow_Image_Recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java b/tensorflow-image-recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java similarity index 100% rename from TensorFlow_Image_Recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java rename to tensorflow-image-recognition/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java diff --git a/TensorFlow_Image_Recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java b/tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java similarity index 100% rename from TensorFlow_Image_Recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java rename to tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java diff --git a/TensorFlow_Keras/pom.xml b/tensorflow-keras/pom.xml similarity index 100% rename from TensorFlow_Keras/pom.xml rename to tensorflow-keras/pom.xml diff --git a/TensorFlow_Keras/readme.md b/tensorflow-keras/readme.md similarity index 100% rename from TensorFlow_Keras/readme.md rename to tensorflow-keras/readme.md diff --git a/TensorFlow_Keras/src/main/resources/generatedModels/Keras/keras-model-script.py b/tensorflow-keras/src/main/resources/generatedModels/Keras/keras-model-script.py similarity index 100% rename from TensorFlow_Keras/src/main/resources/generatedModels/Keras/keras-model-script.py rename to tensorflow-keras/src/main/resources/generatedModels/Keras/keras-model-script.py diff --git a/TensorFlow_Keras/src/main/resources/generatedModels/Keras/simple_mlp.h5 b/tensorflow-keras/src/main/resources/generatedModels/Keras/simple_mlp.h5 similarity index 100% rename from TensorFlow_Keras/src/main/resources/generatedModels/Keras/simple_mlp.h5 rename to tensorflow-keras/src/main/resources/generatedModels/Keras/simple_mlp.h5 diff --git a/TensorFlow_Keras/src/main/resources/log4j.properties b/tensorflow-keras/src/main/resources/log4j.properties similarity index 100% rename from TensorFlow_Keras/src/main/resources/log4j.properties rename to tensorflow-keras/src/main/resources/log4j.properties diff --git a/TensorFlow_Keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java b/tensorflow-keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java similarity index 100% rename from TensorFlow_Keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java rename to tensorflow-keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java diff --git a/TensorFlow_Keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java b/tensorflow-keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java similarity index 100% rename from TensorFlow_Keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java rename to tensorflow-keras/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java diff --git a/TensorFlow_Keras/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java b/tensorflow-keras/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java similarity index 100% rename from TensorFlow_Keras/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java rename to tensorflow-keras/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java From 1c068704b7cd85b8b28347ed8250592a10eb4064 Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Fri, 29 Mar 2019 08:03:22 +0200 Subject: [PATCH 05/20] Pom dependency cleanup --- dl4j-deeplearning-iris/pom.xml | 19 ++------------ h20-deeplearning/pom.xml | 37 --------------------------- tensorflow-image-recognition/pom.xml | 38 ---------------------------- tensorflow-keras/pom.xml | 10 ++------ 4 files changed, 4 insertions(+), 100 deletions(-) diff --git a/dl4j-deeplearning-iris/pom.xml b/dl4j-deeplearning-iris/pom.xml index 7dfb917..86e3ca0 100644 --- a/dl4j-deeplearning-iris/pom.xml +++ b/dl4j-deeplearning-iris/pom.xml @@ -63,23 +63,6 @@ 1.0.0-beta3 - - - - org.tensorflow - tensorflow - 1.3.0 - - - - - - ai.h2o - h2o-genmodel - 3.14.0.1 - - - junit @@ -150,6 +133,7 @@ + diff --git a/h20-deeplearning/pom.xml b/h20-deeplearning/pom.xml index 35a96f9..333ba3f 100644 --- a/h20-deeplearning/pom.xml +++ b/h20-deeplearning/pom.xml @@ -35,43 +35,6 @@ ${kafka.version} - - - - - - org.nd4j - nd4j-native-platform - 1.0.0-beta3 - - - - - - org.deeplearning4j - deeplearning4j-core - 1.0.0-beta3 - - - - org.deeplearning4j - deeplearning4j-modelimport - 1.0.0-beta3 - - - - - - org.tensorflow - tensorflow - 1.3.0 - - - ai.h2o diff --git a/tensorflow-image-recognition/pom.xml b/tensorflow-image-recognition/pom.xml index b2c6659..cd8d6ae 100644 --- a/tensorflow-image-recognition/pom.xml +++ b/tensorflow-image-recognition/pom.xml @@ -35,35 +35,6 @@ ${kafka.version} - - - - - - org.nd4j - nd4j-native-platform - 1.0.0-beta3 - - - - - - org.deeplearning4j - deeplearning4j-core - 1.0.0-beta3 - - - - org.deeplearning4j - deeplearning4j-modelimport - 1.0.0-beta3 - - - org.tensorflow @@ -71,15 +42,6 @@ 1.3.0 - - - - ai.h2o - h2o-genmodel - 3.14.0.1 - - - junit diff --git a/tensorflow-keras/pom.xml b/tensorflow-keras/pom.xml index 501a7df..d5d198c 100644 --- a/tensorflow-keras/pom.xml +++ b/tensorflow-keras/pom.xml @@ -70,14 +70,6 @@ tensorflow 1.3.0 - - - - - ai.h2o - h2o-genmodel - 3.14.0.1 - @@ -150,6 +142,7 @@ + From 78824ec42bdf0d31042e95b3195f12ce05c0a913 Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Fri, 29 Mar 2019 10:03:05 +0200 Subject: [PATCH 06/20] UnitTest with TopologyTestDriver added, Extracted topology and config methods to able to use those in tests --- tensorflow-image-recognition/pom.xml | 7 ++ ..._TensorFlow_Image_Recognition_Example.java | 63 +++++++++------- ...sorFlow_Image_Recognition_ExampleTest.java | 71 +++++++++++++++++++ 3 files changed, 115 insertions(+), 26 deletions(-) create mode 100644 tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java diff --git a/tensorflow-image-recognition/pom.xml b/tensorflow-image-recognition/pom.xml index cd8d6ae..a188719 100644 --- a/tensorflow-image-recognition/pom.xml +++ b/tensorflow-image-recognition/pom.xml @@ -43,6 +43,13 @@ + + org.apache.kafka + kafka-streams-test-utils + ${kafka.version} + test + + junit junit diff --git a/tensorflow-image-recognition/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java b/tensorflow-image-recognition/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java index 42206b9..ca612ea 100644 --- a/tensorflow-image-recognition/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java +++ b/tensorflow-image-recognition/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java @@ -13,6 +13,7 @@ import org.apache.kafka.streams.KafkaStreams; import org.apache.kafka.streams.StreamsBuilder; import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.Topology; import org.apache.kafka.streams.kstream.KStream; import org.tensorflow.DataType; import org.tensorflow.Graph; @@ -31,27 +32,38 @@ */ public class Kafka_Streams_TensorFlow_Image_Recognition_Example { - private static final String imageInputTopic = "ImageInputTopic"; - private static final String imageOutputTopic = "ImageOutputTopic"; + static final String imageInputTopic = "ImageInputTopic"; + static final String imageOutputTopic = "ImageOutputTopic"; // Prediction Value private static String imageClassification = "unknown"; private static String imageProbability = "unknown"; public static void main(final String[] args) throws Exception { + // Configure Kafka Streams Application + final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092"; + final Properties streamsConfiguration = getStreamConfiguration(bootstrapServers); + Topology topology = getStreamTopology(); - // Create TensorFlow object + // Start Kafka Streams Application to process new incoming images from the Input + // Topic + final KafkaStreams streams = new KafkaStreams(topology, streamsConfiguration); - String modelDir = "src/main/resources/generatedModels/CNN_inception5h"; + streams.cleanUp(); - Path pathGraph = Paths.get(modelDir, "tensorflow_inception_graph.pb"); - byte[] graphDef = Files.readAllBytes(pathGraph); + streams.start(); - Path pathModel = Paths.get(modelDir, "imagenet_comp_graph_label_strings.txt"); - List labels = Files.readAllLines(pathModel, Charset.forName("UTF-8")); + System.out.println("Image Recognition Microservice is running..."); - // Configure Kafka Streams Application - final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092"; + System.out.println("Input to Kafka Topic " + imageInputTopic + "; Output to Kafka Topic " + imageOutputTopic); + + // Add shutdown hook to respond to SIGTERM and gracefully close Kafka + // Streams + Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); + + } + + static Properties getStreamConfiguration(String bootstrapServers) { final Properties streamsConfiguration = new Properties(); // Give the Streams application a unique name. The name must be unique // in the Kafka cluster @@ -65,6 +77,19 @@ public static void main(final String[] args) throws Exception { // values. streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); + return streamsConfiguration; + } + + static Topology getStreamTopology() throws IOException { + // Create TensorFlow object + + String modelDir = "src/main/resources/generatedModels/CNN_inception5h"; + + Path pathGraph = Paths.get(modelDir, "tensorflow_inception_graph.pb"); + byte[] graphDef = Files.readAllBytes(pathGraph); + + Path pathModel = Paths.get(modelDir, "imagenet_comp_graph_label_strings.txt"); + List labels = Files.readAllLines(pathModel, Charset.forName("UTF-8")); // In the subsequent lines we define the processing topology of the // Streams application. @@ -116,24 +141,10 @@ public static void main(final String[] args) throws Exception { // Send prediction information to Output Topic transformedMessage.to(imageOutputTopic); - // Start Kafka Streams Application to process new incoming images from the Input - // Topic - final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration); - - streams.cleanUp(); - - streams.start(); - - System.out.println("Image Recognition Microservice is running..."); - - System.out.println("Input to Kafka Topic " + imageInputTopic + "; Output to Kafka Topic " + imageOutputTopic); - - // Add shutdown hook to respond to SIGTERM and gracefully close Kafka - // Streams - Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); - + return builder.build(); } + // ######################################################################################## // Private helper class for construction and execution of the pre-built // TensorFlow model diff --git a/tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java b/tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java new file mode 100644 index 0000000..88e8370 --- /dev/null +++ b/tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java @@ -0,0 +1,71 @@ +package com.github.megachucky.kafka.streams.machinelearning; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.TopologyTestDriver; +import org.apache.kafka.streams.test.ConsumerRecordFactory; +import org.apache.kafka.streams.test.OutputVerifier; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; + +public class Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest { + private TopologyTestDriver testDriver; + + private StringDeserializer stringDeserializer = new StringDeserializer(); + private ConsumerRecordFactory recordFactory = new ConsumerRecordFactory<>(new StringSerializer(), new StringSerializer()); + + @Before + public void setup() throws IOException { + testDriver = new TopologyTestDriver(Kafka_Streams_TensorFlow_Image_Recognition_Example.getStreamTopology(), Kafka_Streams_TensorFlow_Image_Recognition_Example.getStreamConfiguration("localhost:9092")); + } + + @After + public void tearDown() { + try { + testDriver.close(); + } catch (RuntimeException e) { + // https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when executed in Windows, ignoring it + // Logged stacktrace cannot be avoided + System.out.println("Ignoring exception, test failing in Windows due this exception:" + e.getLocalizedMessage()); + } + } + + private String getOutput() { + ProducerRecord output = testDriver.readOutput(Kafka_Streams_TensorFlow_Image_Recognition_Example.imageOutputTopic, stringDeserializer, stringDeserializer); + assertThat(output).isNotNull(); + return output.value(); + } + + @Test + public void testOne() { + testDriver.pipeInput(recordFactory.create(Kafka_Streams_TensorFlow_Image_Recognition_Example.imageInputTopic, null, "src/main/resources/TensorFlow_Images/new_airplane.jpg", 1L)); + assertThat(getOutput()).contains("What is the content of this picture? => airliner"); + } + + @Test + public void testList() { + // Images: 'unknown', Airliner, 'unknown', Butterfly + List inputValues = Arrays.asList("src/main/resources/TensorFlow_Images/trained_airplane_2.jpg", + "src/main/resources/TensorFlow_Images/devil.png", + "src/main/resources/TensorFlow_Images/trained_butterfly.jpg"); + List> records = inputValues.stream().map(v -> new KeyValue(null, v)).collect(Collectors.toList()); + + + testDriver.pipeInput(recordFactory.create(Kafka_Streams_TensorFlow_Image_Recognition_Example.imageInputTopic, records, 1L, 100L)); + assertThat(getOutput()).contains("What is the content of this picture? => airliner"); + assertThat(getOutput()).doesNotContain("What is the content of this picture? => airliner"); + assertThat(getOutput()).contains("What is the content of this picture? => cabbage butterfly"); + } + + +} From ef12861ed3f10ce84d395c059f12d1d42ef2cf1b Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Fri, 29 Mar 2019 11:52:40 +0200 Subject: [PATCH 07/20] Cleanup --- ...TensorFlow_Image_Recognition_ExampleTest.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java b/tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java index 88e8370..65f4900 100644 --- a/tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java +++ b/tensorflow-image-recognition/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java @@ -6,7 +6,6 @@ import org.apache.kafka.streams.KeyValue; import org.apache.kafka.streams.TopologyTestDriver; import org.apache.kafka.streams.test.ConsumerRecordFactory; -import org.apache.kafka.streams.test.OutputVerifier; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -18,6 +17,16 @@ import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; +/** + * TopologyTestDriver based test about stream processing of Kafka_Streams_TensorFlow_Image_Recognition_Example. + * + * @author Jukka Karvanen / jukinimi.com + * * Unit Test of + * {@link Kafka_Streams_TensorFlow_Image_Recognition_Example}, using an + * TopologyTestDriver and a TensorFlow CNN model. + * + */ + public class Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest { private TopologyTestDriver testDriver; @@ -46,12 +55,17 @@ private String getOutput() { return output.value(); } + /** Simple recognition test validating only the recognition part of the output + */ @Test public void testOne() { testDriver.pipeInput(recordFactory.create(Kafka_Streams_TensorFlow_Image_Recognition_Example.imageInputTopic, null, "src/main/resources/TensorFlow_Images/new_airplane.jpg", 1L)); assertThat(getOutput()).contains("What is the content of this picture? => airliner"); } + /** Test based on Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest + * + */ @Test public void testList() { // Images: 'unknown', Airliner, 'unknown', Butterfly From 71cea3c9bc865e16a1abd3c87f2102c7a15f5e71 Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Fri, 29 Mar 2019 12:44:09 +0200 Subject: [PATCH 08/20] Renamed h20-gbm example folder --- {h20-deeplearning => h20-gbm}/pom.xml | 2 +- {h20-deeplearning => h20-gbm}/readme.md | 0 ...ms_MachineLearning_H2O_DeepLearning_Example.java | 0 ...fka_Streams_MachineLearning_H2O_GBM_Example.java | 0 ..._MachineLearning_H2O_GBM_Example_Refactored.java | 0 ...arning_fe7c1f02_08ec_4070_b784_c2531147e451.java | 0 .../machinelearning/models/gbm_pojo_test.java | 0 .../DeepWater_model_python_1503570558230_1.zip | Bin .../GBM_model_python_1503397740678_1.zip | Bin .../src/main/resources/log4j.properties | 0 .../integration/utils/TestEmbeddedKafkaCluster.java | 0 .../streams/integration/utils/TestKafkaStreams.java | 0 ...ng_H2O_DeepLearning_Example_IntegrationTest.java | 0 ...ineLearning_H2O_GBM_Example_IntegrationTest.java | 0 pom.xml | 2 +- 15 files changed, 2 insertions(+), 2 deletions(-) rename {h20-deeplearning => h20-gbm}/pom.xml (98%) rename {h20-deeplearning => h20-gbm}/readme.md (100%) rename {h20-deeplearning => h20-gbm}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java (100%) rename {h20-deeplearning => h20-gbm}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java (100%) rename {h20-deeplearning => h20-gbm}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java (100%) rename {h20-deeplearning => h20-gbm}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java (100%) rename {h20-deeplearning => h20-gbm}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java (100%) rename {h20-deeplearning => h20-gbm}/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip (100%) rename {h20-deeplearning => h20-gbm}/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip (100%) rename {h20-deeplearning => h20-gbm}/src/main/resources/log4j.properties (100%) rename {h20-deeplearning => h20-gbm}/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java (100%) rename {h20-deeplearning => h20-gbm}/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java (100%) rename {h20-deeplearning => h20-gbm}/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java (100%) rename {h20-deeplearning => h20-gbm}/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java (100%) diff --git a/h20-deeplearning/pom.xml b/h20-gbm/pom.xml similarity index 98% rename from h20-deeplearning/pom.xml rename to h20-gbm/pom.xml index 333ba3f..33f682f 100644 --- a/h20-deeplearning/pom.xml +++ b/h20-gbm/pom.xml @@ -4,7 +4,7 @@ 4.0.0 com.github.kaiwaehner.kafka.streams.machinelearning - h20-deeplearning + h20-gbm CP51_AK21 diff --git a/h20-deeplearning/readme.md b/h20-gbm/readme.md similarity index 100% rename from h20-deeplearning/readme.md rename to h20-gbm/readme.md diff --git a/h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java similarity index 100% rename from h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java rename to h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java diff --git a/h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java similarity index 100% rename from h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java rename to h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java diff --git a/h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java similarity index 100% rename from h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java rename to h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java diff --git a/h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java similarity index 100% rename from h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java rename to h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java diff --git a/h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java similarity index 100% rename from h20-deeplearning/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java rename to h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java diff --git a/h20-deeplearning/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip b/h20-gbm/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip similarity index 100% rename from h20-deeplearning/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip rename to h20-gbm/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip diff --git a/h20-deeplearning/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip b/h20-gbm/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip similarity index 100% rename from h20-deeplearning/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip rename to h20-gbm/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip diff --git a/h20-deeplearning/src/main/resources/log4j.properties b/h20-gbm/src/main/resources/log4j.properties similarity index 100% rename from h20-deeplearning/src/main/resources/log4j.properties rename to h20-gbm/src/main/resources/log4j.properties diff --git a/h20-deeplearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java b/h20-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java similarity index 100% rename from h20-deeplearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java rename to h20-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java diff --git a/h20-deeplearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java b/h20-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java similarity index 100% rename from h20-deeplearning/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java rename to h20-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java diff --git a/h20-deeplearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java similarity index 100% rename from h20-deeplearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java rename to h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java diff --git a/h20-deeplearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java similarity index 100% rename from h20-deeplearning/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java rename to h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java diff --git a/pom.xml b/pom.xml index 88d0030..3da61e0 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ - h20-deeplearning + h20-gbm tensorflow-image-recognition From f4efb33fedfdeefa01e824d1352b5c5054700cab Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Fri, 29 Mar 2019 12:48:38 +0200 Subject: [PATCH 09/20] Proposal for Readme split --- dl4j-deeplearning-iris/readme.md | 152 +------------------------ h20-gbm/readme.md | 113 +----------------- readme.md | 98 +--------------- tensorflow-image-recognition/readme.md | 145 +---------------------- tensorflow-keras/readme.md | 142 +---------------------- 5 files changed, 10 insertions(+), 640 deletions(-) diff --git a/dl4j-deeplearning-iris/readme.md b/dl4j-deeplearning-iris/readme.md index e684ab6..f0c9543 100644 --- a/dl4j-deeplearning-iris/readme.md +++ b/dl4j-deeplearning-iris/readme.md @@ -1,132 +1,6 @@ # Machine Learning + Kafka Streams Examples -This project contains **examples which demonstrate how to deploy analytic models to mission-critical, scalable production leveraging [Apache Kafka](https://kafka.apache.org/) and its [Streams API](docs.confluent.io/current/streams/index.html).** -Examples will include analytic models built with TensorFlow, Keras, H2O, Python, DeepLearning4J and other technologies. - -![Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure](http://www.kai-waehner.de/blog/wp-content/uploads/2017/10/Apache_Kafka_Ecosystem_Kafka_Streams_Machine_Learning.png "Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure") - - -## Material (Blogs Posts, Slides, Videos) -Here is some material about this topic if you want to read and listen to the theory instead of just doing hands-on: -- Blog Post: [How to Build and Deploy Scalable Machine Learning in Production with Apache Kafka](https://www.confluent.io/blog/build-deploy-scalable-machine-learning-production-apache-kafka/) -- Slide Deck: [Apache Kafka + Machine Learning => Intelligent Real Time Applications](https://www.slideshare.net/KaiWaehner/apache-kafka-streams-machine-learning-deep-learning) -- Slide Deck: [Deep Learning at Extreme Scale (in the Cloud) 
with the Apache Kafka Open Source Ecosystem](https://www.slideshare.net/KaiWaehner/deep-learning-at-extreme-scale-in-the-cloud-with-the-apache-kafka-open-source-ecosystem) -- Video Recording: [Deep Learning in Mission Critical and Scalable Real Time Applications with Open Source Frameworks](https://vimeo.com/jaxtv/review/256406763/7fbf4213be) -- Blog Post: [Using Apache Kafka to Drive Cutting-Edge Machine Learning - Hybrid ML Architectures, AutoML, and more...](https://www.confluent.io/blog/using-apache-kafka-drive-cutting-edge-machine-learning) -- Blog Post: [Machine Learning with Python, Jupyter, KSQL and TensorFlow](https://www.confluent.io/blog/machine-learning-with-python-jupyter-ksql-tensorflow) - -## Use Cases and Technologies - -##### The following examples are already available including unit tests: - -* Deployment of a H2O GBM model to a Kafka Streams application for prediction of flight delays -* Deployment of a H2O Deep Learning model to a Kafka Streams application for prediction of flight delays -* Deployment of a pre-built TensorFlow CNN model for image recognition -* Deployment of a DL4J model to predict the species of Iris flowers -* Deployment of a Keras model (trained with TensorFlow backend) using the Import Model API from DeepLearning4J - -**More sophisticated use cases around Kafka Streams and other technologies will be added over time in this or related Github project. Some ideas**: - -* Image Recognition with H2O and TensorFlow (to show the difference of using H2O instead of using just low level TensorFlow APIs) -* Anomaly Detection with Autoencoders leveraging DeepLearning4J. -* Cross Selling and Customer Churn Detection using classical Machine Learning algorithms but also Deep Learning -* Stateful Stream Processing to combine different model execution steps into a more powerful workflow instead of "just" inferencing single events (a good example might be a streaming process with sliding or session windows). -* Keras to build different models with Python, TensorFlow, Theano and other Deep Learning frameworks under the hood + Kafka Streams as generic Machine Learning infrastructure to deploy, execute and monitor these different models. - -##### Some other Github projects exist already with more ML + Kafka content: - -- Deep Learning UDF for KSQL: [Streaming Anomaly Detection of MQTT IoT Sensor Data using an Autoencoder](https://github.com/kaiwaehner/ksql-udf-deep-learning-mqtt-iot) -- End-to-End ML Integration Demo: [Continuous Health Checks with Anomaly Detection using KSQL, Kafka Connect, Deep Learning and Elasticsearch](https://github.com/kaiwaehner/ksql-fork-with-deep-learning-function) -- TensorFlow Serving + gRPC + Kafka Streams on Github => Stream Processing and RPC / Request-Response concepts combined: [Model inference with Apache Kafka, Kafka Streams and a TensorFlow model deployed on a TensorFlow Serving model server](https://github.com/kaiwaehner/tensorflow-serving-java-grpc-kafka-streams) - -## Requirements, Installation and Usage -The code is developed and tested on Mac and Linux operating systems. As Kafka does not support and work well on Windows, this is not tested at all. - -Java 8 and Maven 3 are required. Maven will download all required dependencies. - -Just download the project and run - - mvn clean package - -Apache Kafka 2.1 is currently used. The code is also compatible with Kafka and Kafka Streams 1.1 and 2.0. - -**Please make sure to run the Maven build without any changes first.** If it works without errors, you can change library versions, Java version, etc. and see if it still works or if you need to adjust code. - -Every examples includes an implementation and an unit test. The examples are very simple and lightweight. No further configuration is needed to build and run it. Though, for this reason, the generated models are also included (and increase the download size of the project). - -The unit tests use some Kafka helper classes like EmbeddedSingleNodeKafkaCluster in package **com.github.megachucky.kafka.streams.machinelearning.test.utils** so that you can run it without any other configuration or Kafka setup. -If you want to run an implementation of a main class in package **com.github.megachucky.kafka.streams.machinelearning**, you need to start a Kafka cluster (with at least one Zookeeper and one Kafka broker running) and also create the required topics. So check out the unit tests first. - - -### Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays - -**Use Case** - -Gradient Boosting Method (GBM) to predict flight delays. -A H2O generated GBM Java model (POJO) is instantiated and used in a Kafka Streams application to do interference on new events. - -**Machine Learning Technology** - -* [H2O](https://www.h2o.ai) -* Check the [H2O demo](https://github.com/h2oai/h2o-2/wiki/Hacking-Airline-DataSet-with-H2O) to understand the test and and how the model was built -* You can re-use the generated Java model attached to this project ([gbm_pojo_test.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java)) or build your own model using R, Python, Flow UI or any other technologies supported by H2O framework. - -**Source Code** - -[MachineLearning_H2O_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java) - -**Unit Test** - -[MachineLearning_H2O_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java) - -**Manual Testing** - -You can easily test this by yourself. Here are the steps: -- Start Kafka, e.g. with Confluent CLI: - - confluent start kafka -- Create topics AirlineInputTopic and AirlineOutputTopic - - kafka-topics --zookeeper localhost:2181 --create --topic AirlineInputTopic --partitions 3 --replication-factor 1 - - kafka-topics --zookeeper localhost:2181 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1 -- Start the Kafka Streams app: - - java -cp target/kafka-streams-machine-learning-examples-1.0-SNAPSHOT-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example -- Send messages, e.g. with kafkacat: - - echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic -- Consume predictions: - - kafka-console-consumer --bootstrap-server localhost:9092 --topic AirlineOutputTopic --from-beginning -- Find more details in the unit test... - - -**H2O Deep Learning instead of H2O GBM Model** - -The project includes another example with similar code to use a [H2O Deep Learning model](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) -This shows how you can easily test or replace different analytic models for one use case, or even use them for A/B testing. - -### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition -**Use Case** - -Convolutional Neural Network (CNN) to for image recognition. -A prebuilt TensorFlow CNN model is instantiated and used in a Kafka Streams application to do recognize new JPEG images. A Kafka Input Topic receives the location of a new images (another option would be to send the image in the Kafka message instead of just a link to it), infers the content of the picture via the TensorFlow model, and sends the result to a Kafka Output Topic. - -**Machine Learning Technology** -* [TensorFlow](https://www.tensorflow.org/) -* Leverages [TensorFlow for Java](https://www.tensorflow.org/install/install_java). These APIs are particularly well-suited for loading models created in Python and executing them within a Java application. Please note: The Java API doesn't yet include convenience functions (which you might know from [Keras](https://keras.io/)), thus a private helper class is used in the example for construction and execution of the pre-built TensorFlow model. -* Check the official TensorFlow demo [LabelImage](https://github.com/kaiwaehner/tensorflow/blob/r1.3/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java) to understand this image recognition example -* You can re-use the pre-trained TensorFlow model attached to this project [tensorflow_inception_graph.pb](http://arxiv.org/abs/1512.00567) or add your own model. -* The 'images' folder contains models which were used for training the model (trained_airplane_1.jpg, trained_airplane_2.jpg, trained_butterfly.jpg) but also a new picture (new_airplane.jpg) which is not known by the model and using a different resolution than the others. Feel free to add your own pictures (they need to be trained, see list of trained pictures in the file: imagenet_comp_graph_label_strings.txt), otherwise the model will return 'unknown'. - -**Source Code** - -[Kafka_Streams_TensorFlow_Image_Recognition_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java) - -**Unit Test** - -[Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java) +General info in main [Readme](../readme.md) ### Example 3 - Iris Prediction using a Neural Network with DeepLearning4J (DL4J) **Use Case** @@ -142,27 +16,3 @@ This is a famous example: Prediction of the Iris Species - implemented with many **Unit Test** [Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java) -### Example 4 - Python + Keras + TensorFlow + DeepLearning4j - -**Use Case** - -Development of an analytic model trained with Python, Keras and TensorFlow and deployment to Java and Kafka ecosystem. No business case, just a technical demonstration of a simple 'Hello World' Keras model. Feel free to replace the model with any other Keras model trained with your backend of choice. You just need to replace the model binary (and use a model which is compatible with DeepLearning4J 's model importer). - -**Machine Learning Technology** -* [Python](https://www.python.org/) -* [DeepLearning4J](https://deeplearning4j.org) -* [Keras](https://keras.io/) - a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano. -* [TensorFlow](https://www.tensorflow.org/) - used as backend under the hood of Keras -* DeepLearning4J 's [KerasModelImport feature](https://deeplearning4j.org/docs/latest/keras-import-overview) is used for importing the Keras / TensorFlow model into Java. The used model is its 'Hello World' model example. -* The Keras model was trained with this [Python script](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/resources/generatedModels/Keras/keras-model-script.py). - -**Unit Test** - -[Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java) - - - - - - - diff --git a/h20-gbm/readme.md b/h20-gbm/readme.md index e684ab6..5f20d9f 100644 --- a/h20-gbm/readme.md +++ b/h20-gbm/readme.md @@ -1,62 +1,6 @@ # Machine Learning + Kafka Streams Examples -This project contains **examples which demonstrate how to deploy analytic models to mission-critical, scalable production leveraging [Apache Kafka](https://kafka.apache.org/) and its [Streams API](docs.confluent.io/current/streams/index.html).** -Examples will include analytic models built with TensorFlow, Keras, H2O, Python, DeepLearning4J and other technologies. - -![Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure](http://www.kai-waehner.de/blog/wp-content/uploads/2017/10/Apache_Kafka_Ecosystem_Kafka_Streams_Machine_Learning.png "Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure") - - -## Material (Blogs Posts, Slides, Videos) -Here is some material about this topic if you want to read and listen to the theory instead of just doing hands-on: -- Blog Post: [How to Build and Deploy Scalable Machine Learning in Production with Apache Kafka](https://www.confluent.io/blog/build-deploy-scalable-machine-learning-production-apache-kafka/) -- Slide Deck: [Apache Kafka + Machine Learning => Intelligent Real Time Applications](https://www.slideshare.net/KaiWaehner/apache-kafka-streams-machine-learning-deep-learning) -- Slide Deck: [Deep Learning at Extreme Scale (in the Cloud) 
with the Apache Kafka Open Source Ecosystem](https://www.slideshare.net/KaiWaehner/deep-learning-at-extreme-scale-in-the-cloud-with-the-apache-kafka-open-source-ecosystem) -- Video Recording: [Deep Learning in Mission Critical and Scalable Real Time Applications with Open Source Frameworks](https://vimeo.com/jaxtv/review/256406763/7fbf4213be) -- Blog Post: [Using Apache Kafka to Drive Cutting-Edge Machine Learning - Hybrid ML Architectures, AutoML, and more...](https://www.confluent.io/blog/using-apache-kafka-drive-cutting-edge-machine-learning) -- Blog Post: [Machine Learning with Python, Jupyter, KSQL and TensorFlow](https://www.confluent.io/blog/machine-learning-with-python-jupyter-ksql-tensorflow) - -## Use Cases and Technologies - -##### The following examples are already available including unit tests: - -* Deployment of a H2O GBM model to a Kafka Streams application for prediction of flight delays -* Deployment of a H2O Deep Learning model to a Kafka Streams application for prediction of flight delays -* Deployment of a pre-built TensorFlow CNN model for image recognition -* Deployment of a DL4J model to predict the species of Iris flowers -* Deployment of a Keras model (trained with TensorFlow backend) using the Import Model API from DeepLearning4J - -**More sophisticated use cases around Kafka Streams and other technologies will be added over time in this or related Github project. Some ideas**: - -* Image Recognition with H2O and TensorFlow (to show the difference of using H2O instead of using just low level TensorFlow APIs) -* Anomaly Detection with Autoencoders leveraging DeepLearning4J. -* Cross Selling and Customer Churn Detection using classical Machine Learning algorithms but also Deep Learning -* Stateful Stream Processing to combine different model execution steps into a more powerful workflow instead of "just" inferencing single events (a good example might be a streaming process with sliding or session windows). -* Keras to build different models with Python, TensorFlow, Theano and other Deep Learning frameworks under the hood + Kafka Streams as generic Machine Learning infrastructure to deploy, execute and monitor these different models. - -##### Some other Github projects exist already with more ML + Kafka content: - -- Deep Learning UDF for KSQL: [Streaming Anomaly Detection of MQTT IoT Sensor Data using an Autoencoder](https://github.com/kaiwaehner/ksql-udf-deep-learning-mqtt-iot) -- End-to-End ML Integration Demo: [Continuous Health Checks with Anomaly Detection using KSQL, Kafka Connect, Deep Learning and Elasticsearch](https://github.com/kaiwaehner/ksql-fork-with-deep-learning-function) -- TensorFlow Serving + gRPC + Kafka Streams on Github => Stream Processing and RPC / Request-Response concepts combined: [Model inference with Apache Kafka, Kafka Streams and a TensorFlow model deployed on a TensorFlow Serving model server](https://github.com/kaiwaehner/tensorflow-serving-java-grpc-kafka-streams) - -## Requirements, Installation and Usage -The code is developed and tested on Mac and Linux operating systems. As Kafka does not support and work well on Windows, this is not tested at all. - -Java 8 and Maven 3 are required. Maven will download all required dependencies. - -Just download the project and run - - mvn clean package - -Apache Kafka 2.1 is currently used. The code is also compatible with Kafka and Kafka Streams 1.1 and 2.0. - -**Please make sure to run the Maven build without any changes first.** If it works without errors, you can change library versions, Java version, etc. and see if it still works or if you need to adjust code. - -Every examples includes an implementation and an unit test. The examples are very simple and lightweight. No further configuration is needed to build and run it. Though, for this reason, the generated models are also included (and increase the download size of the project). - -The unit tests use some Kafka helper classes like EmbeddedSingleNodeKafkaCluster in package **com.github.megachucky.kafka.streams.machinelearning.test.utils** so that you can run it without any other configuration or Kafka setup. -If you want to run an implementation of a main class in package **com.github.megachucky.kafka.streams.machinelearning**, you need to start a Kafka cluster (with at least one Zookeeper and one Kafka broker running) and also create the required topics. So check out the unit tests first. - +General info in main [Readme](../readme.md) ### Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays @@ -107,61 +51,6 @@ You can easily test this by yourself. Here are the steps: The project includes another example with similar code to use a [H2O Deep Learning model](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) This shows how you can easily test or replace different analytic models for one use case, or even use them for A/B testing. -### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition -**Use Case** - -Convolutional Neural Network (CNN) to for image recognition. -A prebuilt TensorFlow CNN model is instantiated and used in a Kafka Streams application to do recognize new JPEG images. A Kafka Input Topic receives the location of a new images (another option would be to send the image in the Kafka message instead of just a link to it), infers the content of the picture via the TensorFlow model, and sends the result to a Kafka Output Topic. - -**Machine Learning Technology** -* [TensorFlow](https://www.tensorflow.org/) -* Leverages [TensorFlow for Java](https://www.tensorflow.org/install/install_java). These APIs are particularly well-suited for loading models created in Python and executing them within a Java application. Please note: The Java API doesn't yet include convenience functions (which you might know from [Keras](https://keras.io/)), thus a private helper class is used in the example for construction and execution of the pre-built TensorFlow model. -* Check the official TensorFlow demo [LabelImage](https://github.com/kaiwaehner/tensorflow/blob/r1.3/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java) to understand this image recognition example -* You can re-use the pre-trained TensorFlow model attached to this project [tensorflow_inception_graph.pb](http://arxiv.org/abs/1512.00567) or add your own model. -* The 'images' folder contains models which were used for training the model (trained_airplane_1.jpg, trained_airplane_2.jpg, trained_butterfly.jpg) but also a new picture (new_airplane.jpg) which is not known by the model and using a different resolution than the others. Feel free to add your own pictures (they need to be trained, see list of trained pictures in the file: imagenet_comp_graph_label_strings.txt), otherwise the model will return 'unknown'. - -**Source Code** - -[Kafka_Streams_TensorFlow_Image_Recognition_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java) - -**Unit Test** - -[Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java) - -### Example 3 - Iris Prediction using a Neural Network with DeepLearning4J (DL4J) -**Use Case** - -Iris Species Prediction using a Neural Network. -This is a famous example: Prediction of the Iris Species - implemented with many different ML algorithms. Here I use DeepLearning4J (DL4J) to build a neural network using Iris Dataset. - -**Machine Learning Technology** -* [DeepLearning4J](https://deeplearning4j.org) -* Pretty simple example to demo how to build, save and load neural networks with DL4J. [MultiLayerNetwork](https://deeplearning4j.org/doc/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.html) and [INDArray](http://nd4j.org/doc/org/nd4j/linalg/api/ndarray/INDArray.html) are the key APIs to look at if you want to understand the details. -* The model is created via [DeepLearning4J_CSV_Model.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model.java) and stored in the resources: [DL4J_Iris_Model.zip](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/tree/master/src/main/resources/generatedModels/DL4J). No need to re-train, just for reference. Kudos to Adam Gibson who created this example as part of the DL4J project. - -**Unit Test** -[Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java) - -### Example 4 - Python + Keras + TensorFlow + DeepLearning4j - -**Use Case** - -Development of an analytic model trained with Python, Keras and TensorFlow and deployment to Java and Kafka ecosystem. No business case, just a technical demonstration of a simple 'Hello World' Keras model. Feel free to replace the model with any other Keras model trained with your backend of choice. You just need to replace the model binary (and use a model which is compatible with DeepLearning4J 's model importer). - -**Machine Learning Technology** -* [Python](https://www.python.org/) -* [DeepLearning4J](https://deeplearning4j.org) -* [Keras](https://keras.io/) - a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano. -* [TensorFlow](https://www.tensorflow.org/) - used as backend under the hood of Keras -* DeepLearning4J 's [KerasModelImport feature](https://deeplearning4j.org/docs/latest/keras-import-overview) is used for importing the Keras / TensorFlow model into Java. The used model is its 'Hello World' model example. -* The Keras model was trained with this [Python script](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/resources/generatedModels/Keras/keras-model-script.py). - -**Unit Test** - -[Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java) - - - diff --git a/readme.md b/readme.md index e684ab6..169216a 100644 --- a/readme.md +++ b/readme.md @@ -47,6 +47,8 @@ Java 8 and Maven 3 are required. Maven will download all required dependencies. Just download the project and run mvn clean package + +You can do this in main directory or each module separately. Apache Kafka 2.1 is currently used. The code is also compatible with Kafka and Kafka Streams 1.1 and 2.0. @@ -60,107 +62,19 @@ If you want to run an implementation of a main class in package **com.github.meg ### Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays -**Use Case** - -Gradient Boosting Method (GBM) to predict flight delays. -A H2O generated GBM Java model (POJO) is instantiated and used in a Kafka Streams application to do interference on new events. - -**Machine Learning Technology** - -* [H2O](https://www.h2o.ai) -* Check the [H2O demo](https://github.com/h2oai/h2o-2/wiki/Hacking-Airline-DataSet-with-H2O) to understand the test and and how the model was built -* You can re-use the generated Java model attached to this project ([gbm_pojo_test.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java)) or build your own model using R, Python, Flow UI or any other technologies supported by H2O framework. - -**Source Code** - -[MachineLearning_H2O_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java) - -**Unit Test** - -[MachineLearning_H2O_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java) - -**Manual Testing** - -You can easily test this by yourself. Here are the steps: -- Start Kafka, e.g. with Confluent CLI: - - confluent start kafka -- Create topics AirlineInputTopic and AirlineOutputTopic - - kafka-topics --zookeeper localhost:2181 --create --topic AirlineInputTopic --partitions 3 --replication-factor 1 - - kafka-topics --zookeeper localhost:2181 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1 -- Start the Kafka Streams app: - - java -cp target/kafka-streams-machine-learning-examples-1.0-SNAPSHOT-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example -- Send messages, e.g. with kafkacat: - - echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic -- Consume predictions: - - kafka-console-consumer --bootstrap-server localhost:9092 --topic AirlineOutputTopic --from-beginning -- Find more details in the unit test... - - -**H2O Deep Learning instead of H2O GBM Model** - -The project includes another example with similar code to use a [H2O Deep Learning model](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) -This shows how you can easily test or replace different analytic models for one use case, or even use them for A/B testing. +Detailed info in [h20-gbm](h20-gbm/readme.md) ### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition -**Use Case** - -Convolutional Neural Network (CNN) to for image recognition. -A prebuilt TensorFlow CNN model is instantiated and used in a Kafka Streams application to do recognize new JPEG images. A Kafka Input Topic receives the location of a new images (another option would be to send the image in the Kafka message instead of just a link to it), infers the content of the picture via the TensorFlow model, and sends the result to a Kafka Output Topic. - -**Machine Learning Technology** -* [TensorFlow](https://www.tensorflow.org/) -* Leverages [TensorFlow for Java](https://www.tensorflow.org/install/install_java). These APIs are particularly well-suited for loading models created in Python and executing them within a Java application. Please note: The Java API doesn't yet include convenience functions (which you might know from [Keras](https://keras.io/)), thus a private helper class is used in the example for construction and execution of the pre-built TensorFlow model. -* Check the official TensorFlow demo [LabelImage](https://github.com/kaiwaehner/tensorflow/blob/r1.3/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java) to understand this image recognition example -* You can re-use the pre-trained TensorFlow model attached to this project [tensorflow_inception_graph.pb](http://arxiv.org/abs/1512.00567) or add your own model. -* The 'images' folder contains models which were used for training the model (trained_airplane_1.jpg, trained_airplane_2.jpg, trained_butterfly.jpg) but also a new picture (new_airplane.jpg) which is not known by the model and using a different resolution than the others. Feel free to add your own pictures (they need to be trained, see list of trained pictures in the file: imagenet_comp_graph_label_strings.txt), otherwise the model will return 'unknown'. -**Source Code** - -[Kafka_Streams_TensorFlow_Image_Recognition_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java) - -**Unit Test** - -[Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java) +Detailed info in [tensorflow-image-recognition](tensorflow-image-recognition/readme.md) ### Example 3 - Iris Prediction using a Neural Network with DeepLearning4J (DL4J) -**Use Case** - -Iris Species Prediction using a Neural Network. -This is a famous example: Prediction of the Iris Species - implemented with many different ML algorithms. Here I use DeepLearning4J (DL4J) to build a neural network using Iris Dataset. -**Machine Learning Technology** -* [DeepLearning4J](https://deeplearning4j.org) -* Pretty simple example to demo how to build, save and load neural networks with DL4J. [MultiLayerNetwork](https://deeplearning4j.org/doc/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.html) and [INDArray](http://nd4j.org/doc/org/nd4j/linalg/api/ndarray/INDArray.html) are the key APIs to look at if you want to understand the details. -* The model is created via [DeepLearning4J_CSV_Model.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model.java) and stored in the resources: [DL4J_Iris_Model.zip](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/tree/master/src/main/resources/generatedModels/DL4J). No need to re-train, just for reference. Kudos to Adam Gibson who created this example as part of the DL4J project. - -**Unit Test** -[Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java) +Detailed info in [dl4j-deeplearning-iris](dl4j-deeplearning-iris/readme.md) ### Example 4 - Python + Keras + TensorFlow + DeepLearning4j -**Use Case** - -Development of an analytic model trained with Python, Keras and TensorFlow and deployment to Java and Kafka ecosystem. No business case, just a technical demonstration of a simple 'Hello World' Keras model. Feel free to replace the model with any other Keras model trained with your backend of choice. You just need to replace the model binary (and use a model which is compatible with DeepLearning4J 's model importer). - -**Machine Learning Technology** -* [Python](https://www.python.org/) -* [DeepLearning4J](https://deeplearning4j.org) -* [Keras](https://keras.io/) - a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano. -* [TensorFlow](https://www.tensorflow.org/) - used as backend under the hood of Keras -* DeepLearning4J 's [KerasModelImport feature](https://deeplearning4j.org/docs/latest/keras-import-overview) is used for importing the Keras / TensorFlow model into Java. The used model is its 'Hello World' model example. -* The Keras model was trained with this [Python script](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/resources/generatedModels/Keras/keras-model-script.py). - -**Unit Test** - -[Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java) - - +Detailed info in [tensorflow-kerasm](tensorflow-keras/readme.md) diff --git a/tensorflow-image-recognition/readme.md b/tensorflow-image-recognition/readme.md index e684ab6..8219e92 100644 --- a/tensorflow-image-recognition/readme.md +++ b/tensorflow-image-recognition/readme.md @@ -1,111 +1,6 @@ # Machine Learning + Kafka Streams Examples -This project contains **examples which demonstrate how to deploy analytic models to mission-critical, scalable production leveraging [Apache Kafka](https://kafka.apache.org/) and its [Streams API](docs.confluent.io/current/streams/index.html).** -Examples will include analytic models built with TensorFlow, Keras, H2O, Python, DeepLearning4J and other technologies. - -![Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure](http://www.kai-waehner.de/blog/wp-content/uploads/2017/10/Apache_Kafka_Ecosystem_Kafka_Streams_Machine_Learning.png "Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure") - - -## Material (Blogs Posts, Slides, Videos) -Here is some material about this topic if you want to read and listen to the theory instead of just doing hands-on: -- Blog Post: [How to Build and Deploy Scalable Machine Learning in Production with Apache Kafka](https://www.confluent.io/blog/build-deploy-scalable-machine-learning-production-apache-kafka/) -- Slide Deck: [Apache Kafka + Machine Learning => Intelligent Real Time Applications](https://www.slideshare.net/KaiWaehner/apache-kafka-streams-machine-learning-deep-learning) -- Slide Deck: [Deep Learning at Extreme Scale (in the Cloud) 
with the Apache Kafka Open Source Ecosystem](https://www.slideshare.net/KaiWaehner/deep-learning-at-extreme-scale-in-the-cloud-with-the-apache-kafka-open-source-ecosystem) -- Video Recording: [Deep Learning in Mission Critical and Scalable Real Time Applications with Open Source Frameworks](https://vimeo.com/jaxtv/review/256406763/7fbf4213be) -- Blog Post: [Using Apache Kafka to Drive Cutting-Edge Machine Learning - Hybrid ML Architectures, AutoML, and more...](https://www.confluent.io/blog/using-apache-kafka-drive-cutting-edge-machine-learning) -- Blog Post: [Machine Learning with Python, Jupyter, KSQL and TensorFlow](https://www.confluent.io/blog/machine-learning-with-python-jupyter-ksql-tensorflow) - -## Use Cases and Technologies - -##### The following examples are already available including unit tests: - -* Deployment of a H2O GBM model to a Kafka Streams application for prediction of flight delays -* Deployment of a H2O Deep Learning model to a Kafka Streams application for prediction of flight delays -* Deployment of a pre-built TensorFlow CNN model for image recognition -* Deployment of a DL4J model to predict the species of Iris flowers -* Deployment of a Keras model (trained with TensorFlow backend) using the Import Model API from DeepLearning4J - -**More sophisticated use cases around Kafka Streams and other technologies will be added over time in this or related Github project. Some ideas**: - -* Image Recognition with H2O and TensorFlow (to show the difference of using H2O instead of using just low level TensorFlow APIs) -* Anomaly Detection with Autoencoders leveraging DeepLearning4J. -* Cross Selling and Customer Churn Detection using classical Machine Learning algorithms but also Deep Learning -* Stateful Stream Processing to combine different model execution steps into a more powerful workflow instead of "just" inferencing single events (a good example might be a streaming process with sliding or session windows). -* Keras to build different models with Python, TensorFlow, Theano and other Deep Learning frameworks under the hood + Kafka Streams as generic Machine Learning infrastructure to deploy, execute and monitor these different models. - -##### Some other Github projects exist already with more ML + Kafka content: - -- Deep Learning UDF for KSQL: [Streaming Anomaly Detection of MQTT IoT Sensor Data using an Autoencoder](https://github.com/kaiwaehner/ksql-udf-deep-learning-mqtt-iot) -- End-to-End ML Integration Demo: [Continuous Health Checks with Anomaly Detection using KSQL, Kafka Connect, Deep Learning and Elasticsearch](https://github.com/kaiwaehner/ksql-fork-with-deep-learning-function) -- TensorFlow Serving + gRPC + Kafka Streams on Github => Stream Processing and RPC / Request-Response concepts combined: [Model inference with Apache Kafka, Kafka Streams and a TensorFlow model deployed on a TensorFlow Serving model server](https://github.com/kaiwaehner/tensorflow-serving-java-grpc-kafka-streams) - -## Requirements, Installation and Usage -The code is developed and tested on Mac and Linux operating systems. As Kafka does not support and work well on Windows, this is not tested at all. - -Java 8 and Maven 3 are required. Maven will download all required dependencies. - -Just download the project and run - - mvn clean package - -Apache Kafka 2.1 is currently used. The code is also compatible with Kafka and Kafka Streams 1.1 and 2.0. - -**Please make sure to run the Maven build without any changes first.** If it works without errors, you can change library versions, Java version, etc. and see if it still works or if you need to adjust code. - -Every examples includes an implementation and an unit test. The examples are very simple and lightweight. No further configuration is needed to build and run it. Though, for this reason, the generated models are also included (and increase the download size of the project). - -The unit tests use some Kafka helper classes like EmbeddedSingleNodeKafkaCluster in package **com.github.megachucky.kafka.streams.machinelearning.test.utils** so that you can run it without any other configuration or Kafka setup. -If you want to run an implementation of a main class in package **com.github.megachucky.kafka.streams.machinelearning**, you need to start a Kafka cluster (with at least one Zookeeper and one Kafka broker running) and also create the required topics. So check out the unit tests first. - - -### Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays - -**Use Case** - -Gradient Boosting Method (GBM) to predict flight delays. -A H2O generated GBM Java model (POJO) is instantiated and used in a Kafka Streams application to do interference on new events. - -**Machine Learning Technology** - -* [H2O](https://www.h2o.ai) -* Check the [H2O demo](https://github.com/h2oai/h2o-2/wiki/Hacking-Airline-DataSet-with-H2O) to understand the test and and how the model was built -* You can re-use the generated Java model attached to this project ([gbm_pojo_test.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java)) or build your own model using R, Python, Flow UI or any other technologies supported by H2O framework. - -**Source Code** - -[MachineLearning_H2O_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java) - -**Unit Test** - -[MachineLearning_H2O_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java) - -**Manual Testing** - -You can easily test this by yourself. Here are the steps: -- Start Kafka, e.g. with Confluent CLI: - - confluent start kafka -- Create topics AirlineInputTopic and AirlineOutputTopic - - kafka-topics --zookeeper localhost:2181 --create --topic AirlineInputTopic --partitions 3 --replication-factor 1 - - kafka-topics --zookeeper localhost:2181 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1 -- Start the Kafka Streams app: - - java -cp target/kafka-streams-machine-learning-examples-1.0-SNAPSHOT-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example -- Send messages, e.g. with kafkacat: - - echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic -- Consume predictions: - - kafka-console-consumer --bootstrap-server localhost:9092 --topic AirlineOutputTopic --from-beginning -- Find more details in the unit test... - - -**H2O Deep Learning instead of H2O GBM Model** - -The project includes another example with similar code to use a [H2O Deep Learning model](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) -This shows how you can easily test or replace different analytic models for one use case, or even use them for A/B testing. +General info in main [Readme](../readme.md) ### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition **Use Case** @@ -128,41 +23,3 @@ A prebuilt TensorFlow CNN model is instantiated and used in a Kafka Streams appl [Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java) -### Example 3 - Iris Prediction using a Neural Network with DeepLearning4J (DL4J) -**Use Case** - -Iris Species Prediction using a Neural Network. -This is a famous example: Prediction of the Iris Species - implemented with many different ML algorithms. Here I use DeepLearning4J (DL4J) to build a neural network using Iris Dataset. - -**Machine Learning Technology** -* [DeepLearning4J](https://deeplearning4j.org) -* Pretty simple example to demo how to build, save and load neural networks with DL4J. [MultiLayerNetwork](https://deeplearning4j.org/doc/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.html) and [INDArray](http://nd4j.org/doc/org/nd4j/linalg/api/ndarray/INDArray.html) are the key APIs to look at if you want to understand the details. -* The model is created via [DeepLearning4J_CSV_Model.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model.java) and stored in the resources: [DL4J_Iris_Model.zip](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/tree/master/src/main/resources/generatedModels/DL4J). No need to re-train, just for reference. Kudos to Adam Gibson who created this example as part of the DL4J project. - -**Unit Test** -[Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java) - -### Example 4 - Python + Keras + TensorFlow + DeepLearning4j - -**Use Case** - -Development of an analytic model trained with Python, Keras and TensorFlow and deployment to Java and Kafka ecosystem. No business case, just a technical demonstration of a simple 'Hello World' Keras model. Feel free to replace the model with any other Keras model trained with your backend of choice. You just need to replace the model binary (and use a model which is compatible with DeepLearning4J 's model importer). - -**Machine Learning Technology** -* [Python](https://www.python.org/) -* [DeepLearning4J](https://deeplearning4j.org) -* [Keras](https://keras.io/) - a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano. -* [TensorFlow](https://www.tensorflow.org/) - used as backend under the hood of Keras -* DeepLearning4J 's [KerasModelImport feature](https://deeplearning4j.org/docs/latest/keras-import-overview) is used for importing the Keras / TensorFlow model into Java. The used model is its 'Hello World' model example. -* The Keras model was trained with this [Python script](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/resources/generatedModels/Keras/keras-model-script.py). - -**Unit Test** - -[Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java) - - - - - - - diff --git a/tensorflow-keras/readme.md b/tensorflow-keras/readme.md index e684ab6..a3d725f 100644 --- a/tensorflow-keras/readme.md +++ b/tensorflow-keras/readme.md @@ -1,146 +1,6 @@ # Machine Learning + Kafka Streams Examples -This project contains **examples which demonstrate how to deploy analytic models to mission-critical, scalable production leveraging [Apache Kafka](https://kafka.apache.org/) and its [Streams API](docs.confluent.io/current/streams/index.html).** -Examples will include analytic models built with TensorFlow, Keras, H2O, Python, DeepLearning4J and other technologies. - -![Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure](http://www.kai-waehner.de/blog/wp-content/uploads/2017/10/Apache_Kafka_Ecosystem_Kafka_Streams_Machine_Learning.png "Kafka Open Source Ecosystem for a Scalable Mission Critical Machine Learning Infrastructure") - - -## Material (Blogs Posts, Slides, Videos) -Here is some material about this topic if you want to read and listen to the theory instead of just doing hands-on: -- Blog Post: [How to Build and Deploy Scalable Machine Learning in Production with Apache Kafka](https://www.confluent.io/blog/build-deploy-scalable-machine-learning-production-apache-kafka/) -- Slide Deck: [Apache Kafka + Machine Learning => Intelligent Real Time Applications](https://www.slideshare.net/KaiWaehner/apache-kafka-streams-machine-learning-deep-learning) -- Slide Deck: [Deep Learning at Extreme Scale (in the Cloud) 
with the Apache Kafka Open Source Ecosystem](https://www.slideshare.net/KaiWaehner/deep-learning-at-extreme-scale-in-the-cloud-with-the-apache-kafka-open-source-ecosystem) -- Video Recording: [Deep Learning in Mission Critical and Scalable Real Time Applications with Open Source Frameworks](https://vimeo.com/jaxtv/review/256406763/7fbf4213be) -- Blog Post: [Using Apache Kafka to Drive Cutting-Edge Machine Learning - Hybrid ML Architectures, AutoML, and more...](https://www.confluent.io/blog/using-apache-kafka-drive-cutting-edge-machine-learning) -- Blog Post: [Machine Learning with Python, Jupyter, KSQL and TensorFlow](https://www.confluent.io/blog/machine-learning-with-python-jupyter-ksql-tensorflow) - -## Use Cases and Technologies - -##### The following examples are already available including unit tests: - -* Deployment of a H2O GBM model to a Kafka Streams application for prediction of flight delays -* Deployment of a H2O Deep Learning model to a Kafka Streams application for prediction of flight delays -* Deployment of a pre-built TensorFlow CNN model for image recognition -* Deployment of a DL4J model to predict the species of Iris flowers -* Deployment of a Keras model (trained with TensorFlow backend) using the Import Model API from DeepLearning4J - -**More sophisticated use cases around Kafka Streams and other technologies will be added over time in this or related Github project. Some ideas**: - -* Image Recognition with H2O and TensorFlow (to show the difference of using H2O instead of using just low level TensorFlow APIs) -* Anomaly Detection with Autoencoders leveraging DeepLearning4J. -* Cross Selling and Customer Churn Detection using classical Machine Learning algorithms but also Deep Learning -* Stateful Stream Processing to combine different model execution steps into a more powerful workflow instead of "just" inferencing single events (a good example might be a streaming process with sliding or session windows). -* Keras to build different models with Python, TensorFlow, Theano and other Deep Learning frameworks under the hood + Kafka Streams as generic Machine Learning infrastructure to deploy, execute and monitor these different models. - -##### Some other Github projects exist already with more ML + Kafka content: - -- Deep Learning UDF for KSQL: [Streaming Anomaly Detection of MQTT IoT Sensor Data using an Autoencoder](https://github.com/kaiwaehner/ksql-udf-deep-learning-mqtt-iot) -- End-to-End ML Integration Demo: [Continuous Health Checks with Anomaly Detection using KSQL, Kafka Connect, Deep Learning and Elasticsearch](https://github.com/kaiwaehner/ksql-fork-with-deep-learning-function) -- TensorFlow Serving + gRPC + Kafka Streams on Github => Stream Processing and RPC / Request-Response concepts combined: [Model inference with Apache Kafka, Kafka Streams and a TensorFlow model deployed on a TensorFlow Serving model server](https://github.com/kaiwaehner/tensorflow-serving-java-grpc-kafka-streams) - -## Requirements, Installation and Usage -The code is developed and tested on Mac and Linux operating systems. As Kafka does not support and work well on Windows, this is not tested at all. - -Java 8 and Maven 3 are required. Maven will download all required dependencies. - -Just download the project and run - - mvn clean package - -Apache Kafka 2.1 is currently used. The code is also compatible with Kafka and Kafka Streams 1.1 and 2.0. - -**Please make sure to run the Maven build without any changes first.** If it works without errors, you can change library versions, Java version, etc. and see if it still works or if you need to adjust code. - -Every examples includes an implementation and an unit test. The examples are very simple and lightweight. No further configuration is needed to build and run it. Though, for this reason, the generated models are also included (and increase the download size of the project). - -The unit tests use some Kafka helper classes like EmbeddedSingleNodeKafkaCluster in package **com.github.megachucky.kafka.streams.machinelearning.test.utils** so that you can run it without any other configuration or Kafka setup. -If you want to run an implementation of a main class in package **com.github.megachucky.kafka.streams.machinelearning**, you need to start a Kafka cluster (with at least one Zookeeper and one Kafka broker running) and also create the required topics. So check out the unit tests first. - - -### Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays - -**Use Case** - -Gradient Boosting Method (GBM) to predict flight delays. -A H2O generated GBM Java model (POJO) is instantiated and used in a Kafka Streams application to do interference on new events. - -**Machine Learning Technology** - -* [H2O](https://www.h2o.ai) -* Check the [H2O demo](https://github.com/h2oai/h2o-2/wiki/Hacking-Airline-DataSet-with-H2O) to understand the test and and how the model was built -* You can re-use the generated Java model attached to this project ([gbm_pojo_test.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java)) or build your own model using R, Python, Flow UI or any other technologies supported by H2O framework. - -**Source Code** - -[MachineLearning_H2O_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java) - -**Unit Test** - -[MachineLearning_H2O_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java) - -**Manual Testing** - -You can easily test this by yourself. Here are the steps: -- Start Kafka, e.g. with Confluent CLI: - - confluent start kafka -- Create topics AirlineInputTopic and AirlineOutputTopic - - kafka-topics --zookeeper localhost:2181 --create --topic AirlineInputTopic --partitions 3 --replication-factor 1 - - kafka-topics --zookeeper localhost:2181 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1 -- Start the Kafka Streams app: - - java -cp target/kafka-streams-machine-learning-examples-1.0-SNAPSHOT-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example -- Send messages, e.g. with kafkacat: - - echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic -- Consume predictions: - - kafka-console-consumer --bootstrap-server localhost:9092 --topic AirlineOutputTopic --from-beginning -- Find more details in the unit test... - - -**H2O Deep Learning instead of H2O GBM Model** - -The project includes another example with similar code to use a [H2O Deep Learning model](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) -This shows how you can easily test or replace different analytic models for one use case, or even use them for A/B testing. - -### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition -**Use Case** - -Convolutional Neural Network (CNN) to for image recognition. -A prebuilt TensorFlow CNN model is instantiated and used in a Kafka Streams application to do recognize new JPEG images. A Kafka Input Topic receives the location of a new images (another option would be to send the image in the Kafka message instead of just a link to it), infers the content of the picture via the TensorFlow model, and sends the result to a Kafka Output Topic. - -**Machine Learning Technology** -* [TensorFlow](https://www.tensorflow.org/) -* Leverages [TensorFlow for Java](https://www.tensorflow.org/install/install_java). These APIs are particularly well-suited for loading models created in Python and executing them within a Java application. Please note: The Java API doesn't yet include convenience functions (which you might know from [Keras](https://keras.io/)), thus a private helper class is used in the example for construction and execution of the pre-built TensorFlow model. -* Check the official TensorFlow demo [LabelImage](https://github.com/kaiwaehner/tensorflow/blob/r1.3/tensorflow/java/src/main/java/org/tensorflow/examples/LabelImage.java) to understand this image recognition example -* You can re-use the pre-trained TensorFlow model attached to this project [tensorflow_inception_graph.pb](http://arxiv.org/abs/1512.00567) or add your own model. -* The 'images' folder contains models which were used for training the model (trained_airplane_1.jpg, trained_airplane_2.jpg, trained_butterfly.jpg) but also a new picture (new_airplane.jpg) which is not known by the model and using a different resolution than the others. Feel free to add your own pictures (they need to be trained, see list of trained pictures in the file: imagenet_comp_graph_label_strings.txt), otherwise the model will return 'unknown'. - -**Source Code** - -[Kafka_Streams_TensorFlow_Image_Recognition_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java) - -**Unit Test** - -[Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java) - -### Example 3 - Iris Prediction using a Neural Network with DeepLearning4J (DL4J) -**Use Case** - -Iris Species Prediction using a Neural Network. -This is a famous example: Prediction of the Iris Species - implemented with many different ML algorithms. Here I use DeepLearning4J (DL4J) to build a neural network using Iris Dataset. - -**Machine Learning Technology** -* [DeepLearning4J](https://deeplearning4j.org) -* Pretty simple example to demo how to build, save and load neural networks with DL4J. [MultiLayerNetwork](https://deeplearning4j.org/doc/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.html) and [INDArray](http://nd4j.org/doc/org/nd4j/linalg/api/ndarray/INDArray.html) are the key APIs to look at if you want to understand the details. -* The model is created via [DeepLearning4J_CSV_Model.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model.java) and stored in the resources: [DL4J_Iris_Model.zip](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/tree/master/src/main/resources/generatedModels/DL4J). No need to re-train, just for reference. Kudos to Adam Gibson who created this example as part of the DL4J project. - -**Unit Test** -[Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java) +General info in main [Readme](../readme.md) ### Example 4 - Python + Keras + TensorFlow + DeepLearning4j From 07060667de15fc82898777141643b40c1630489c Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Fri, 29 Mar 2019 12:56:43 +0200 Subject: [PATCH 10/20] Removed unnecessary files --- src/.DS_Store | Bin 6148 -> 0 bytes src/main/.DS_Store | Bin 6148 -> 0 bytes src/main/java/.DS_Store | Bin 6148 -> 0 bytes src/main/java/com/.DS_Store | Bin 6148 -> 0 bytes src/main/java/com/github/.DS_Store | Bin 6148 -> 0 bytes src/main/java/com/github/megachucky/.DS_Store | Bin 6148 -> 0 bytes .../com/github/megachucky/kafka/.DS_Store | Bin 6148 -> 0 bytes .../github/megachucky/kafka/streams/.DS_Store | Bin 6148 -> 0 bytes .../utils/TestEmbeddedKafkaCluster.java | 45 ----------------- .../integration/utils/TestKafkaStreams.java | 47 ------------------ 10 files changed, 92 deletions(-) delete mode 100644 src/.DS_Store delete mode 100644 src/main/.DS_Store delete mode 100644 src/main/java/.DS_Store delete mode 100644 src/main/java/com/.DS_Store delete mode 100644 src/main/java/com/github/.DS_Store delete mode 100644 src/main/java/com/github/megachucky/.DS_Store delete mode 100644 src/main/java/com/github/megachucky/kafka/.DS_Store delete mode 100644 src/main/java/com/github/megachucky/kafka/streams/.DS_Store delete mode 100644 src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java delete mode 100644 src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java diff --git a/src/.DS_Store b/src/.DS_Store deleted file mode 100644 index d549e6c5577d641630f75c73536586f14fb74385..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJ5EDE47ABBBGIHwxmVx@t03SAO-%F0zRxa>lL0Z41Ihw+$ZCa@h6X)Lo@aaP z8SN|9V*s`}A0B}EG1^C)retP&UmZ5UN|LYIV^7GId!wugko_!<1Nx*JyEL| z5Cd}tZqr?R|9_(XGXKwu@UIxKVf(z@@RO>yEH1@V-^m;4I%_5-~tF3K^+i#j?VMXLSaS~dY0@jc51bKLsN^0 z?w;4J$RHv;+$b9h6I0}!Tx68{^>MkK$MI$*w?)zl@IfZ~xlK?3DnJFO02QDDGg2TA z@_sd=XX2w!0V*&L1?>A!;KrJ4LH~3h_y_tMAK>5S|49o|DnJGPOaWc) zkNX{7D$mxB*R%R5tF~@%&@V@L`w2i|NAVi&hW%m-uqIm&6&Qa6Tm}Xz@KXg|*oF~o diff --git a/src/main/java/com/.DS_Store b/src/main/java/com/.DS_Store deleted file mode 100644 index eefea06944c2a61b5c210a7e3dab83e5f0fbbd4e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKI|@QE5ZqM}!N$@uSMUZw^aNf&R8T>%Q2SSTE|2Eyr%;}DS}3!ynPjt*kSX52 z6%n0YHWQJNhz#I{aUcb~vR?;`+n25E?9+IpPT+5%s~E$0R|!`vwtyc`3)9AjbSca;(5S$-6}SU~)D@Ee diff --git a/src/main/java/com/github/.DS_Store b/src/main/java/com/github/.DS_Store deleted file mode 100644 index b1a0c32adc84a12b53bcba33fe746b734e4c2040..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJ5EDE3>=db5i}_&_X^x#6@?RUf&36i1f@VJ(!GjvaWuxCg6KgPnrJN9v+MQj z)0^V?8GtR0y9ZznU`BVur-!lmx%90#ZNvqwE1g6=AfdoikL||U?wVnS5__zLlw1p`N zkief2(55}Md%RSftsk#v^;1-BT;Nb&jxhKMKw?Mn2JVLWWCLgpEmcv0@khWhFp$7c G3A_P|?h|DI diff --git a/src/main/java/com/github/megachucky/kafka/.DS_Store b/src/main/java/com/github/megachucky/kafka/.DS_Store deleted file mode 100644 index 83bd0788a972c32bd69f6fdd16ea4f10b011d840..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJ5EC}5S)cbL`st~rLVvZtSC7_E&wESL6LaSzlw8lw9I}Aq6b|lK(o?%?DdW< zPx1CG09(Hw?tvwMIo%N-K1|K`-6wWa5hK!h#v@)Z9ESbz=`_jyKH%It40yvUp1<-> zN8gf30VyB_q<|EV0#{a`3haDw<#TnM6p#XcUjhF&5}w?s#S|&NoTxOUN0OIlMbuq!|GBXM diff --git a/src/main/java/com/github/megachucky/kafka/streams/.DS_Store b/src/main/java/com/github/megachucky/kafka/streams/.DS_Store deleted file mode 100644 index 01b5ab89f51a0cedaa54896ab79bae4f7c5ad4da..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKF-`+P474FdB#NY@+%NEhRTN&34*&@Qq=QHx`m1;sPh-Y*b2_LZnrJL}XV>fT z>83cJ&CEBSha0oCnJwW&`{FV;-qUAxQ4t5iQRBtl?9m4J{Ve zaDi|b_AyHU8w0>zI3^+j^P~ck>NR3`(h+Z!*9*tQq?_09;q_#%3B}{-sNW*pyeDdv z0#abAz-1oS?Ekm$Kj!}_Nhc{F1+I#&Ya9Fy&YW*J4f~*AiFOQ( ic8mkt@pU9+UgIA3y>LtnI^sbG>Sutu$fUr3EARst`xk=% diff --git a/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java b/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java deleted file mode 100644 index d3c7b18..0000000 --- a/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java +++ /dev/null @@ -1,45 +0,0 @@ -package com.github.jukkakarvanen.kafka.streams.integration.utils; - -import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Properties; - -/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. - * - * @author Jukka Karvanen - * - * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 - * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will catch and ignore the exception - * happening during the tear down of the test - * The exception does not have affect to functionality - */ - -public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster { - private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class); - - public TestEmbeddedKafkaCluster(int numBrokers) { - super(numBrokers); - } - - public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig) { - super(numBrokers, brokerConfig); - } - - public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart) { - super(numBrokers, brokerConfig, mockTimeMillisStart); - } - - public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, long mockTimeNanoStart) { - super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart); - } - - public void after() { - try { - super.after(); - } catch (RuntimeException e) { - log.warn("Ignoring exception, test failing in Windows due this exception {}", e); - } - } -} diff --git a/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java b/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java deleted file mode 100644 index ef2bc4e..0000000 --- a/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.github.jukkakarvanen.kafka.streams.integration.utils; - -import org.apache.kafka.common.utils.Time; -import org.apache.kafka.streams.KafkaClientSupplier; -import org.apache.kafka.streams.KafkaStreams; -import org.apache.kafka.streams.Topology; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.Properties; - -/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. - * - * @author Jukka Karvanen - * - * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 - * Replacing KafkaStreams with TestKafkaStreams will catch and ignore the exception caused by cleanUp - * The exception does not have affect to functionality - */ - -public class TestKafkaStreams extends KafkaStreams { - private static final Logger log = LoggerFactory.getLogger(TestKafkaStreams.class); - - public TestKafkaStreams(Topology topology, Properties props) { - super(topology, props); - } - - public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier) { - super(topology, props, clientSupplier); - } - - public TestKafkaStreams(Topology topology, Properties props, Time time) { - super(topology, props, time); - } - - public TestKafkaStreams(Topology topology, Properties props, KafkaClientSupplier clientSupplier, Time time) { - super(topology, props, clientSupplier, time); - } - - public void cleanUp() { - try { - super.cleanUp(); - } catch (RuntimeException e) { - log.warn("Ignoring exception, test failing in Windows due this exception {}", e); - } - } -} From f13c9eb94c83a8b19e7524f297dc3c4c16c3bc21 Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Mon, 1 Apr 2019 09:23:09 +0300 Subject: [PATCH 11/20] Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored class as baseline for parent class --- ....java => Kafka_Streams_MachineLearning_H2O_Application.java} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/{Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java => Kafka_Streams_MachineLearning_H2O_Application.java} (98%) diff --git a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java similarity index 98% rename from h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java rename to h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java index eeed681..fa6693d 100644 --- a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored.java +++ b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java @@ -24,7 +24,7 @@ * "AirlineOutputTopic". * */ -public class Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored { +public class Kafka_Streams_MachineLearning_H2O_Application { // Name of the generated H2O model private static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.gbm_pojo_test"; From 1b464e4dffdc9cd9411112376f4a972c48ef05f3 Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Mon, 1 Apr 2019 09:49:59 +0300 Subject: [PATCH 12/20] Unit test Added. Duplicate code eliminated with common parent class, Refacted class removed (used as baseline for Application) --- h20-gbm/pom.xml | 7 ++ ...reams_MachineLearning_H2O_Application.java | 99 ++++++++------- ...hineLearning_H2O_DeepLearning_Example.java | 114 +----------------- ...reams_MachineLearning_H2O_GBM_Example.java | 112 +---------------- ...Learning_H2O_DeepLearning_ExampleTest.java | 85 +++++++++++++ ...s_MachineLearning_H2O_GBM_ExampleTest.java | 85 +++++++++++++ 6 files changed, 234 insertions(+), 268 deletions(-) create mode 100644 h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java create mode 100644 h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java diff --git a/h20-gbm/pom.xml b/h20-gbm/pom.xml index 33f682f..ab7a62f 100644 --- a/h20-gbm/pom.xml +++ b/h20-gbm/pom.xml @@ -44,6 +44,13 @@ + + org.apache.kafka + kafka-streams-test-utils + ${kafka.version} + test + + junit junit diff --git a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java index fa6693d..9dd014a 100644 --- a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java +++ b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java @@ -1,22 +1,22 @@ package com.github.megachucky.kafka.streams.machinelearning; -import java.util.Properties; - +import hex.genmodel.easy.EasyPredictModelWrapper; +import hex.genmodel.easy.RowData; +import hex.genmodel.easy.exception.PredictException; +import hex.genmodel.easy.prediction.BinomialModelPrediction; import org.apache.kafka.common.serialization.Serdes; import org.apache.kafka.streams.KafkaStreams; import org.apache.kafka.streams.StreamsBuilder; import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.Topology; import org.apache.kafka.streams.kstream.KStream; import org.apache.kafka.streams.kstream.ValueMapper; -import hex.genmodel.easy.EasyPredictModelWrapper; -import hex.genmodel.easy.RowData; -import hex.genmodel.easy.exception.PredictException; -import hex.genmodel.easy.prediction.BinomialModelPrediction; +import java.util.Properties; /** * @author Kai Waehner (www.kai-waehner.de) - * + * * Creates a new Kafka Streams application for prediction of flight * delays The application uses the GBM model "gbm_pojo_test" (built with * H2O.ai) to infer messages sent to Kafka topic "AirlineInputTopic". @@ -26,26 +26,35 @@ */ public class Kafka_Streams_MachineLearning_H2O_Application { - // Name of the generated H2O model - private static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.gbm_pojo_test"; + public static final String INPUT_TOPIC = "AirlineInputTopic"; + public static final String OUTPUT_TOPIC = "AirlineOutputTopic"; - // Prediction Value - private static String airlineDelayPreduction = "unknown"; - public static void main(final String[] args) throws Exception { + public static void execute(String bootstrapServers, String applictionId, String modelClassName) throws Exception { - // Create H2O object (see gbm_pojo_test.java) - hex.genmodel.GenModel rawModel; - rawModel = (hex.genmodel.GenModel) Class.forName(modelClassName).newInstance(); - EasyPredictModelWrapper model = new EasyPredictModelWrapper(rawModel); + final Properties streamsConfiguration = getStreamConfiguration(bootstrapServers, applictionId); + Topology topology = getStreamTopology(modelClassName); + + // Start Kafka Streams Application to process new incoming messages from Input + // Topic + final KafkaStreams streams = new KafkaStreams(topology, streamsConfiguration); + streams.cleanUp(); + streams.start(); + System.out.println("Airline Delay Prediction Microservice is running..."); + System.out.println("Input to Kafka Topic 'AirlineInputTopic'; Output to Kafka Topic 'AirlineOutputTopic'"); + + // Add shutdown hook to respond to SIGTERM and gracefully close Kafka + // Streams + Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); + + } - // Configure Kafka Streams Application - final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092"; + static Properties getStreamConfiguration(String bootstrapServers, String applicationId) { final Properties streamsConfiguration = new Properties(); // Give the Streams application a unique name. The name must be unique // in the Kafka cluster // against which the application is run. - streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-streams-h2o-gbm-example"); + streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, applicationId); // Where to find Kafka broker(s). streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); @@ -56,6 +65,14 @@ public static void main(final String[] args) throws Exception { // For illustrative purposes we disable record caches streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0); + return streamsConfiguration; + } + + static Topology getStreamTopology(String modelClassName) throws InstantiationException, IllegalAccessException, ClassNotFoundException { + // Create H2O object (see gbm_pojo_test.java) + hex.genmodel.GenModel rawModel; + rawModel = (hex.genmodel.GenModel) Class.forName(modelClassName).newInstance(); + EasyPredictModelWrapper model = new EasyPredictModelWrapper(rawModel); // In the subsequent lines we define the processing topology of the // Streams application. @@ -66,9 +83,12 @@ public static void main(final String[] args) throws Exception { // represent lines of text (for the sake of this example, we ignore // whatever may be stored // in the message keys). - final KStream airlineInputLines = builder.stream("AirlineInputTopic"); + final KStream airlineInputLines = builder.stream(INPUT_TOPIC); - ValueMapper myValMapper = value -> { + // Stream Processor (in this case 'mapValues' to add custom logic, i.e. apply + // the analytic model) + KStream transformedMessage = + airlineInputLines.mapValues(value -> { // Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed,IsDepDelayed // value: @@ -99,7 +119,6 @@ public static void main(final String[] args) throws Exception { e.printStackTrace(); } - airlineDelayPreduction = p.label; System.out.println("Label (aka prediction) is flight departure delayed: " + p.label); System.out.print("Class probabilities: "); for (int i = 0; i < p.classProbabilities.length; i++) { @@ -110,38 +129,14 @@ public static void main(final String[] args) throws Exception { } System.out.println(""); System.out.println("#####################"); - + return "Prediction: Is Airline delayed? => " + p.label; } - - return value; - - }; - - // Stream Processor (in this case 'mapValues' to add custom logic, i.e. apply - // the analytic model) - airlineInputLines.mapValues(value -> myValMapper); - - // airlineInputLines.print(); - - // Transform message: Add prediction information - KStream transformedMessage = airlineInputLines - .mapValues(value -> "Prediction: Is Airline delayed? => " + airlineDelayPreduction); + //No prediction + return null; + }); // Send prediction information to Output Topic - transformedMessage.to("AirlineOutputTopic"); - - // Start Kafka Streams Application to process new incoming messages from Input - // Topic - final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration); - streams.cleanUp(); - streams.start(); - System.out.println("Airline Delay Prediction Microservice is running..."); - System.out.println("Input to Kafka Topic 'AirlineInputTopic'; Output to Kafka Topic 'AirlineOutputTopic'"); - - // Add shutdown hook to respond to SIGTERM and gracefully close Kafka - // Streams - Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); - + transformedMessage.to(OUTPUT_TOPIC); + return builder.build(); } - } diff --git a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java index 5b49f56..101dfe0 100644 --- a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java +++ b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java @@ -22,122 +22,18 @@ * Kafka topic "AirlineOutputTopic". * */ -public class Kafka_Streams_MachineLearning_H2O_DeepLearning_Example { +public class Kafka_Streams_MachineLearning_H2O_DeepLearning_Example extends Kafka_Streams_MachineLearning_H2O_Application { // Name of the generated H2O model - private static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451"; + static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451"; + + static final String APPLICATION_ID = "kafka-streams-h2o-deeplearning-example"; - // Prediction Value - private static String airlineDelayPreduction = "unknown"; - public static void main(final String[] args) throws Exception { - - // Create H2O object (see gbm_pojo_test.java) - hex.genmodel.GenModel rawModel; - rawModel = (hex.genmodel.GenModel) Class.forName(modelClassName).newInstance(); - EasyPredictModelWrapper model = new EasyPredictModelWrapper(rawModel); // Configure Kafka Streams Application final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092"; - final Properties streamsConfiguration = new Properties(); - // Give the Streams application a unique name. The name must be unique - // in the Kafka cluster - // against which the application is run. - streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-streams-h2o-deeplearning-example"); - // Where to find Kafka broker(s). - streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); - - - // Specify default (de)serializers for record keys and for record - // values. - streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); - streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); - - // For illustrative purposes we disable record caches - streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0); - - // In the subsequent lines we define the processing topology of the - // Streams application. - final StreamsBuilder builder = new StreamsBuilder(); - - // Construct a `KStream` from the input topic "AirlineInputTopic", where - // message values - // represent lines of text (for the sake of this example, we ignore - // whatever may be stored - // in the message keys). - final KStream airlineInputLines = builder.stream("AirlineInputTopic"); - - - // Stream Processor (in this case 'foreach' to add custom logic, i.e. apply the analytic model) - airlineInputLines.foreach((key, value) -> { - - // Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed,IsDepDelayed - // value: - // YES, probably delayed: - // 1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES - // NO, probably not delayed: - // 1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES - - if (value != null && !value.equals("")) { - System.out.println("#####################"); - System.out.println("Flight Input:" + value); - - String[] valuesArray = value.split(","); - - RowData row = new RowData(); - row.put("Year", valuesArray[0]); - row.put("Month", valuesArray[1]); - row.put("DayofMonth", valuesArray[2]); - row.put("DayOfWeek", valuesArray[3]); - row.put("CRSDepTime", valuesArray[5]); - row.put("UniqueCarrier", valuesArray[8]); - row.put("Origin", valuesArray[16]); - row.put("Dest", valuesArray[17]); - BinomialModelPrediction p = null; - try { - p = model.predictBinomial(row); - } catch (PredictException e) { - e.printStackTrace(); - } - - airlineDelayPreduction = p.label; - System.out.println("Label (aka prediction) is flight departure delayed: " + p.label); - System.out.print("Class probabilities: "); - for (int i = 0; i < p.classProbabilities.length; i++) { - if (i > 0) { - System.out.print(","); - } - System.out.print(p.classProbabilities[i]); - } - System.out.println(""); - System.out.println("#####################"); - - - - } - - } - ); - - // airlineInputLines.print(); - - // Transform message: Add prediction information - KStream transformedMessage = airlineInputLines.mapValues(value -> "Prediction: Is Airline delayed? => " + airlineDelayPreduction); - - // Send prediction information to Output Topic - transformedMessage.to("AirlineOutputTopic"); - - // Start Kafka Streams Application to process new incoming messages from Input Topic - final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration); - streams.cleanUp(); - streams.start(); - System.out.println("Airline Delay Prediction Microservice is running..."); - System.out.println("Input to Kafka Topic 'AirlineInputTopic'; Output to Kafka Topic 'AirlineOutputTopic'"); - - // Add shutdown hook to respond to SIGTERM and gracefully close Kafka - // Streams - Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); - + execute(bootstrapServers, APPLICATION_ID, modelClassName); } } diff --git a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java index 695c419..e903145 100644 --- a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java +++ b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java @@ -6,6 +6,7 @@ import org.apache.kafka.streams.KafkaStreams; import org.apache.kafka.streams.StreamsBuilder; import org.apache.kafka.streams.StreamsConfig; +import org.apache.kafka.streams.Topology; import org.apache.kafka.streams.kstream.KStream; import hex.genmodel.easy.EasyPredictModelWrapper; @@ -23,120 +24,17 @@ * "AirlineOutputTopic". * */ -public class Kafka_Streams_MachineLearning_H2O_GBM_Example { - +public class Kafka_Streams_MachineLearning_H2O_GBM_Example extends Kafka_Streams_MachineLearning_H2O_Application { // Name of the generated H2O model - private static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.gbm_pojo_test"; + static String modelClassName = "com.github.megachucky.kafka.streams.machinelearning.models.gbm_pojo_test"; - // Prediction Value - private static String airlineDelayPreduction = "unknown"; + static final String APPLICATION_ID = "kafka-streams-h2o-gbm-example"; public static void main(final String[] args) throws Exception { - // Create H2O object (see gbm_pojo_test.java) - hex.genmodel.GenModel rawModel; - rawModel = (hex.genmodel.GenModel) Class.forName(modelClassName).newInstance(); - EasyPredictModelWrapper model = new EasyPredictModelWrapper(rawModel); - // Configure Kafka Streams Application final String bootstrapServers = args.length > 0 ? args[0] : "localhost:9092"; - final Properties streamsConfiguration = new Properties(); - // Give the Streams application a unique name. The name must be unique - // in the Kafka cluster - // against which the application is run. - streamsConfiguration.put(StreamsConfig.APPLICATION_ID_CONFIG, "kafka-streams-h2o-gbm-example"); - // Where to find Kafka broker(s). - streamsConfiguration.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); - - // Specify default (de)serializers for record keys and for record - // values. - streamsConfiguration.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); - streamsConfiguration.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName()); - - // For illustrative purposes we disable record caches - streamsConfiguration.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0); - - // In the subsequent lines we define the processing topology of the - // Streams application. - final StreamsBuilder builder = new StreamsBuilder(); - - // Construct a `KStream` from the input topic "AirlineInputTopic", where - // message values - // represent lines of text (for the sake of this example, we ignore - // whatever may be stored - // in the message keys). - final KStream airlineInputLines = builder.stream("AirlineInputTopic"); - - // Stream Processor (in this case 'foreach' to add custom logic, i.e. apply the - // analytic model) - airlineInputLines.foreach((key, value) -> { - - // Year,Month,DayofMonth,DayOfWeek,DepTime,CRSDepTime,ArrTime,CRSArrTime,UniqueCarrier,FlightNum,TailNum,ActualElapsedTime,CRSElapsedTime,AirTime,ArrDelay,DepDelay,Origin,Dest,Distance,TaxiIn,TaxiOut,Cancelled,CancellationCode,Diverted,CarrierDelay,WeatherDelay,NASDelay,SecurityDelay,LateAircraftDelay,IsArrDelayed,IsDepDelayed - // value: - // YES, probably delayed: - // 1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES - // NO, probably not delayed: - // 1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES - - if (value != null && !value.equals("")) { - System.out.println("#####################"); - System.out.println("Flight Input:" + value); - - String[] valuesArray = value.split(","); - - RowData row = new RowData(); - row.put("Year", valuesArray[0]); - row.put("Month", valuesArray[1]); - row.put("DayofMonth", valuesArray[2]); - row.put("DayOfWeek", valuesArray[3]); - row.put("CRSDepTime", valuesArray[5]); - row.put("UniqueCarrier", valuesArray[8]); - row.put("Origin", valuesArray[16]); - row.put("Dest", valuesArray[17]); - BinomialModelPrediction p = null; - try { - p = model.predictBinomial(row); - } catch (PredictException e) { - e.printStackTrace(); - } - - airlineDelayPreduction = p.label; - System.out.println("Label (aka prediction) is flight departure delayed: " + p.label); - System.out.print("Class probabilities: "); - for (int i = 0; i < p.classProbabilities.length; i++) { - if (i > 0) { - System.out.print(","); - } - System.out.print(p.classProbabilities[i]); - } - System.out.println(""); - System.out.println("#####################"); - - } - - }); - - // airlineInputLines.print(); - - // Transform message: Add prediction information - KStream transformedMessage = airlineInputLines - .mapValues(value -> "Prediction: Is Airline delayed? => " + airlineDelayPreduction); - - // Send prediction information to Output Topic - transformedMessage.to("AirlineOutputTopic"); - - // Start Kafka Streams Application to process new incoming messages from Input - // Topic - final KafkaStreams streams = new KafkaStreams(builder.build(), streamsConfiguration); - streams.cleanUp(); - streams.start(); - System.out.println("Airline Delay Prediction Microservice is running..."); - System.out.println("Input to Kafka Topic 'AirlineInputTopic'; Output to Kafka Topic 'AirlineOutputTopic'"); - - // Add shutdown hook to respond to SIGTERM and gracefully close Kafka - // Streams - Runtime.getRuntime().addShutdownHook(new Thread(streams::close)); - + execute(bootstrapServers, APPLICATION_ID, modelClassName); } } diff --git a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java new file mode 100644 index 0000000..6ef0e52 --- /dev/null +++ b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java @@ -0,0 +1,85 @@ +package com.github.megachucky.kafka.streams.machinelearning; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.TopologyTestDriver; +import org.apache.kafka.streams.test.ConsumerRecordFactory; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; + +/** + * TopologyTestDriver based test about stream processing of Kafka_Streams_TensorFlow_Image_Recognition_Example. + * + * @author Jukka Karvanen / jukinimi.com + * * Unit Test of + * {@link Kafka_Streams_MachineLearning_H2O_GBM_Example}, using an + * TopologyTestDriver and a TensorFlow CNN model. + * + */ + +public class Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest { + private TopologyTestDriver testDriver; + + private StringDeserializer stringDeserializer = new StringDeserializer(); + private ConsumerRecordFactory recordFactory = new ConsumerRecordFactory<>(new StringSerializer(), new StringSerializer()); + + @Before + public void setup() throws IllegalAccessException, ClassNotFoundException, InstantiationException { + testDriver = new TopologyTestDriver(Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.getStreamTopology(Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.modelClassName), Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.getStreamConfiguration("localhost:9092",Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.APPLICATION_ID)); + } + + @After + public void tearDown() { + try { + testDriver.close(); + } catch (RuntimeException e) { + // https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when executed in Windows, ignoring it + // Logged stacktrace cannot be avoided + System.out.println("Ignoring exception, test failing in Windows due this exception:" + e.getLocalizedMessage()); + } + } + + private String getOutput() { + ProducerRecord output = testDriver.readOutput(Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.OUTPUT_TOPIC, stringDeserializer, stringDeserializer); + assertThat(output).isNotNull(); + return output.value(); + } + + /** Simple test validating only the prediction part of the output + */ + @Test + public void testOne() { + testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.INPUT_TOPIC, null, "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", 1L)); + assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); + } + + /** Test based on Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest + * + */ + @Test + public void testList() { + // Flight data (one single flight) --> We want to predict if it will be + // delayed or not + List inputValues = Arrays.asList( + "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", + "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES"); + List> records = inputValues.stream().map(v -> new KeyValue(null, v)).collect(Collectors.toList()); + + + testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.INPUT_TOPIC, records, 1L, 100L)); + assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); + //This model predict also another flight to be delayed + assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); + } + + +} diff --git a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java new file mode 100644 index 0000000..bae28f9 --- /dev/null +++ b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java @@ -0,0 +1,85 @@ +package com.github.megachucky.kafka.streams.machinelearning; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.TopologyTestDriver; +import org.apache.kafka.streams.test.ConsumerRecordFactory; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; + +/** + * TopologyTestDriver based test about stream processing of Kafka_Streams_TensorFlow_Image_Recognition_Example. + * + * @author Jukka Karvanen / jukinimi.com + * * Unit Test of + * {@link Kafka_Streams_MachineLearning_H2O_GBM_Example}, using an + * TopologyTestDriver and a TensorFlow CNN model. + * + */ + +public class Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest { + private TopologyTestDriver testDriver; + + private StringDeserializer stringDeserializer = new StringDeserializer(); + private ConsumerRecordFactory recordFactory = new ConsumerRecordFactory<>(new StringSerializer(), new StringSerializer()); + + @Before + public void setup() throws IllegalAccessException, ClassNotFoundException, InstantiationException { + testDriver = new TopologyTestDriver(Kafka_Streams_MachineLearning_H2O_GBM_Example.getStreamTopology(Kafka_Streams_MachineLearning_H2O_GBM_Example.modelClassName), Kafka_Streams_MachineLearning_H2O_GBM_Example.getStreamConfiguration("localhost:9092",Kafka_Streams_MachineLearning_H2O_GBM_Example.APPLICATION_ID)); + } + + @After + public void tearDown() { + try { + testDriver.close(); + } catch (RuntimeException e) { + // https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when executed in Windows, ignoring it + // Logged stacktrace cannot be avoided + System.out.println("Ignoring exception, test failing in Windows due this exception:" + e.getLocalizedMessage()); + } + } + + private String getOutput() { + ProducerRecord output = testDriver.readOutput(Kafka_Streams_MachineLearning_H2O_GBM_Example.OUTPUT_TOPIC, stringDeserializer, stringDeserializer); + assertThat(output).isNotNull(); + return output.value(); + } + + /** Simple test validating only the prediction part of the output + */ + @Test + public void testOne() { + testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_GBM_Example.INPUT_TOPIC, null, "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", 1L)); + assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); + } + + /** Test based on Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest + * + */ + @Test + public void testList() { + // Flight data (one single flight) --> We want to predict if it will be + // delayed or not + List inputValues = Arrays.asList( + "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", + "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES"); + List> records = inputValues.stream().map(v -> new KeyValue(null, v)).collect(Collectors.toList()); + + + testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_GBM_Example.INPUT_TOPIC, records, 1L, 100L)); + assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); + assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => NO"); + } + + +} From a847585a51df766b4a254ce9396a164300617c14 Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Mon, 1 Apr 2019 10:35:13 +0300 Subject: [PATCH 13/20] Readme and JavaDoc update --- dl4j-deeplearning-iris/readme.md | 4 +-- h20-gbm/readme.md | 28 +++++++++++++++---- ...reams_MachineLearning_H2O_Application.java | 6 +++- ...hineLearning_H2O_DeepLearning_Example.java | 6 +++- ...reams_MachineLearning_H2O_GBM_Example.java | 3 ++ ...Learning_H2O_DeepLearning_ExampleTest.java | 4 +-- ...s_MachineLearning_H2O_GBM_ExampleTest.java | 2 +- tensorflow-image-recognition/readme.md | 5 ++-- tensorflow-keras/readme.md | 4 +-- 9 files changed, 46 insertions(+), 16 deletions(-) diff --git a/dl4j-deeplearning-iris/readme.md b/dl4j-deeplearning-iris/readme.md index f0c9543..61d4ebe 100644 --- a/dl4j-deeplearning-iris/readme.md +++ b/dl4j-deeplearning-iris/readme.md @@ -11,8 +11,8 @@ This is a famous example: Prediction of the Iris Species - implemented with many **Machine Learning Technology** * [DeepLearning4J](https://deeplearning4j.org) * Pretty simple example to demo how to build, save and load neural networks with DL4J. [MultiLayerNetwork](https://deeplearning4j.org/doc/org/deeplearning4j/nn/multilayer/MultiLayerNetwork.html) and [INDArray](http://nd4j.org/doc/org/nd4j/linalg/api/ndarray/INDArray.html) are the key APIs to look at if you want to understand the details. -* The model is created via [DeepLearning4J_CSV_Model.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model.java) and stored in the resources: [DL4J_Iris_Model.zip](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/tree/master/src/main/resources/generatedModels/DL4J). No need to re-train, just for reference. Kudos to Adam Gibson who created this example as part of the DL4J project. +* The model is created via [DeepLearning4J_CSV_Model.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/DeepLearning4J_CSV_Model.java) and stored in the resources: [DL4J_Iris_Model.zip](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/tree/master/src/main/resources/generatedModels/DL4J). No need to re-train, just for reference. Kudos to Adam Gibson who created this example as part of the DL4J project. **Unit Test** -[Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java) +[Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_DL4J_DeepLearning_Iris_IntegrationTest.java) diff --git a/h20-gbm/readme.md b/h20-gbm/readme.md index 5f20d9f..1841549 100644 --- a/h20-gbm/readme.md +++ b/h20-gbm/readme.md @@ -13,15 +13,17 @@ A H2O generated GBM Java model (POJO) is instantiated and used in a Kafka Stream * [H2O](https://www.h2o.ai) * Check the [H2O demo](https://github.com/h2oai/h2o-2/wiki/Hacking-Airline-DataSet-with-H2O) to understand the test and and how the model was built -* You can re-use the generated Java model attached to this project ([gbm_pojo_test.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java)) or build your own model using R, Python, Flow UI or any other technologies supported by H2O framework. +* You can re-use the generated Java model attached to this project ([gbm_pojo_test.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java)) or build your own model using R, Python, Flow UI or any other technologies supported by H2O framework. **Source Code** -[MachineLearning_H2O_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java) +[Kafka_Streams_MachineLearning_H2O_GBM_Example.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java) +->Logic in [Kafka_Streams_MachineLearning_H2O_Application.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java) **Unit Test** -[MachineLearning_H2O_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java) +[Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java) +[Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java) **Manual Testing** @@ -36,7 +38,7 @@ You can easily test this by yourself. Here are the steps: kafka-topics --zookeeper localhost:2181 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1 - Start the Kafka Streams app: - java -cp target/kafka-streams-machine-learning-examples-1.0-SNAPSHOT-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example + java -cp target/h20-gbm/target/h20-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example - Send messages, e.g. with kafkacat: echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic @@ -48,9 +50,25 @@ You can easily test this by yourself. Here are the steps: **H2O Deep Learning instead of H2O GBM Model** -The project includes another example with similar code to use a [H2O Deep Learning model](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) +The project includes another example with similar code to use a [H2O Deep Learning model](src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) This shows how you can easily test or replace different analytic models for one use case, or even use them for A/B testing. +**Source Code** + +[Kafka_Streams_MachineLearning_H20_DeepLearning_Example.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H20_DeepLearning_Example.java) +->Logic in [Kafka_Streams_MachineLearning_H2O_Application.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java) + +**Unit Test** + +[Kafka_Streams_MachineLearning_H20_DeepLearning_ExampleTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H20_DeepLearning_ExampleTest.java) +[Kafka_Streams_MachineLearning_H20_DeepLearning_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H20_DeepLearning_Example_IntegrationTest.java) + + +**Manual Testing** +Same as above but change class to start app: + +- Start the Kafka Streams app: + java -cp target/h20-gbm/target/h20-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H20_DeepLearning_Example diff --git a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java index 9dd014a..ef9cec6 100644 --- a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java +++ b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java @@ -18,11 +18,15 @@ * @author Kai Waehner (www.kai-waehner.de) * * Creates a new Kafka Streams application for prediction of flight - * delays The application uses the GBM model "gbm_pojo_test" (built with + * delays The application uses the GBM model (built with * H2O.ai) to infer messages sent to Kafka topic "AirlineInputTopic". * The outcome of model inference is sent to Kafka topic * "AirlineOutputTopic". * + * Refactored that all model use same base class, only modelName and applicationId passed in + * Used Kafka_Streams_MachineLearning_H2O_GBM_Example_Refactored class as baseline for this + * @author Jukka Karvanen / jukinimi.com + * */ public class Kafka_Streams_MachineLearning_H2O_Application { diff --git a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java index 101dfe0..2da1bfd 100644 --- a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java +++ b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java @@ -17,10 +17,14 @@ * @author Kai Waehner (www.kai-waehner.de) * * Creates a new Kafka Streams application for prediction of flight delays - * The application uses the GBM model "gbm_pojo_test" (built with H2O.ai) to infer messages + * The application uses the GBM model "deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451" (built with H2O.ai) to infer messages * sent to Kafka topic "AirlineInputTopic". The outcome of model inference is sent to * Kafka topic "AirlineOutputTopic". * + * Main logic now in parent Class + * Refactoring to utilize common @link Kafka_Streams_MachineLearning_H2O_Application class + * @author Jukka Karvanen / jukinimi.com + * */ public class Kafka_Streams_MachineLearning_H2O_DeepLearning_Example extends Kafka_Streams_MachineLearning_H2O_Application { diff --git a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java index e903145..e90d9eb 100644 --- a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java +++ b/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java @@ -23,6 +23,9 @@ * The outcome of model inference is sent to Kafka topic * "AirlineOutputTopic". * + * * Main logic now in parent Class + * * Refactoring to utilize common @link Kafka_Streams_MachineLearning_H2O_Application class + * * @author Jukka Karvanen / jukinimi.com */ public class Kafka_Streams_MachineLearning_H2O_GBM_Example extends Kafka_Streams_MachineLearning_H2O_Application { // Name of the generated H2O model diff --git a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java index 6ef0e52..80c0ccb 100644 --- a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java +++ b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java @@ -21,8 +21,8 @@ * * @author Jukka Karvanen / jukinimi.com * * Unit Test of - * {@link Kafka_Streams_MachineLearning_H2O_GBM_Example}, using an - * TopologyTestDriver and a TensorFlow CNN model. + * {@link Kafka_Streams_MachineLearning_H2O_DeepLearning_Example}, using an + * TopologyTestDriver and a H20 DeepLearning model. * */ diff --git a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java index bae28f9..806dba2 100644 --- a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java +++ b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java @@ -23,7 +23,7 @@ * @author Jukka Karvanen / jukinimi.com * * Unit Test of * {@link Kafka_Streams_MachineLearning_H2O_GBM_Example}, using an - * TopologyTestDriver and a TensorFlow CNN model. + * TopologyTestDriver and a H20 GBM model. * */ diff --git a/tensorflow-image-recognition/readme.md b/tensorflow-image-recognition/readme.md index 8219e92..ee8b09f 100644 --- a/tensorflow-image-recognition/readme.md +++ b/tensorflow-image-recognition/readme.md @@ -17,9 +17,10 @@ A prebuilt TensorFlow CNN model is instantiated and used in a Kafka Streams appl **Source Code** -[Kafka_Streams_TensorFlow_Image_Recognition_Example.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java) +[Kafka_Streams_TensorFlow_Image_Recognition_Example.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_Example.java) **Unit Test** -[Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java) +[Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_TensorFlow_Image_Recognition_ExampleTest.java) +[Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest.java) diff --git a/tensorflow-keras/readme.md b/tensorflow-keras/readme.md index a3d725f..0c326d3 100644 --- a/tensorflow-keras/readme.md +++ b/tensorflow-keras/readme.md @@ -14,11 +14,11 @@ Development of an analytic model trained with Python, Keras and TensorFlow and d * [Keras](https://keras.io/) - a high-level neural networks API, written in Python and capable of running on top of TensorFlow, CNTK, or Theano. * [TensorFlow](https://www.tensorflow.org/) - used as backend under the hood of Keras * DeepLearning4J 's [KerasModelImport feature](https://deeplearning4j.org/docs/latest/keras-import-overview) is used for importing the Keras / TensorFlow model into Java. The used model is its 'Hello World' model example. -* The Keras model was trained with this [Python script](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/main/resources/generatedModels/Keras/keras-model-script.py). +* The Keras model was trained with this [Python script](src/main/resources/generatedModels/Keras/keras-model-script.py). **Unit Test** -[Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java](https://github.com/kaiwaehner/kafka-streams-machine-learning-examples/blob/master/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java) +[Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_TensorFlow_Keras_Example_IntegrationTest.java) From e691b6a28517712852891fc8964bfb30f5e458cb Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Mon, 1 Apr 2019 10:51:24 +0300 Subject: [PATCH 14/20] Typo in h2o --- h20-gbm/readme.md | 11 ++++++----- ..._MachineLearning_H2O_DeepLearning_ExampleTest.java | 2 +- ...a_Streams_MachineLearning_H2O_GBM_ExampleTest.java | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/h20-gbm/readme.md b/h20-gbm/readme.md index 1841549..acafc71 100644 --- a/h20-gbm/readme.md +++ b/h20-gbm/readme.md @@ -38,7 +38,7 @@ You can easily test this by yourself. Here are the steps: kafka-topics --zookeeper localhost:2181 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1 - Start the Kafka Streams app: - java -cp target/h20-gbm/target/h20-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example + java -cp target/h20-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example - Send messages, e.g. with kafkacat: echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic @@ -55,20 +55,21 @@ This shows how you can easily test or replace different analytic models for one **Source Code** -[Kafka_Streams_MachineLearning_H20_DeepLearning_Example.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H20_DeepLearning_Example.java) +[Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java) ->Logic in [Kafka_Streams_MachineLearning_H2O_Application.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java) **Unit Test** -[Kafka_Streams_MachineLearning_H20_DeepLearning_ExampleTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H20_DeepLearning_ExampleTest.java) -[Kafka_Streams_MachineLearning_H20_DeepLearning_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H20_DeepLearning_Example_IntegrationTest.java) +[Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java) +[Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) **Manual Testing** + Same as above but change class to start app: - Start the Kafka Streams app: - java -cp target/h20-gbm/target/h20-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H20_DeepLearning_Example + java -cp target/h20-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_DeepLearning_Example diff --git a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java index 80c0ccb..07fdc14 100644 --- a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java +++ b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java @@ -22,7 +22,7 @@ * @author Jukka Karvanen / jukinimi.com * * Unit Test of * {@link Kafka_Streams_MachineLearning_H2O_DeepLearning_Example}, using an - * TopologyTestDriver and a H20 DeepLearning model. + * TopologyTestDriver and a H2O DeepLearning model. * */ diff --git a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java index 806dba2..9c2ad70 100644 --- a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java +++ b/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java @@ -23,7 +23,7 @@ * @author Jukka Karvanen / jukinimi.com * * Unit Test of * {@link Kafka_Streams_MachineLearning_H2O_GBM_Example}, using an - * TopologyTestDriver and a H20 GBM model. + * TopologyTestDriver and a H2O GBM model. * */ From f83a799270a29678d0874cef03d925f2e7039198 Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Mon, 1 Apr 2019 10:56:23 +0300 Subject: [PATCH 15/20] Typo in h2o --- {h20-gbm => h2o-gbm}/pom.xml | 2 +- {h20-gbm => h2o-gbm}/readme.md | 4 ++-- ...fka_Streams_MachineLearning_H2O_Application.java | 0 ...ms_MachineLearning_H2O_DeepLearning_Example.java | 0 ...fka_Streams_MachineLearning_H2O_GBM_Example.java | 0 ...arning_fe7c1f02_08ec_4070_b784_c2531147e451.java | 0 .../machinelearning/models/gbm_pojo_test.java | 0 .../DeepWater_model_python_1503570558230_1.zip | Bin .../GBM_model_python_1503397740678_1.zip | Bin .../src/main/resources/log4j.properties | 0 .../integration/utils/TestEmbeddedKafkaCluster.java | 0 .../streams/integration/utils/TestKafkaStreams.java | 0 ...achineLearning_H2O_DeepLearning_ExampleTest.java | 0 ...Streams_MachineLearning_H2O_GBM_ExampleTest.java | 0 ...ng_H2O_DeepLearning_Example_IntegrationTest.java | 0 ...ineLearning_H2O_GBM_Example_IntegrationTest.java | 0 pom.xml | 2 +- readme.md | 2 +- 18 files changed, 5 insertions(+), 5 deletions(-) rename {h20-gbm => h2o-gbm}/pom.xml (99%) rename {h20-gbm => h2o-gbm}/readme.md (97%) rename {h20-gbm => h2o-gbm}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java (100%) rename {h20-gbm => h2o-gbm}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java (100%) rename {h20-gbm => h2o-gbm}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java (100%) rename {h20-gbm => h2o-gbm}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java (100%) rename {h20-gbm => h2o-gbm}/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java (100%) rename {h20-gbm => h2o-gbm}/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip (100%) rename {h20-gbm => h2o-gbm}/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip (100%) rename {h20-gbm => h2o-gbm}/src/main/resources/log4j.properties (100%) rename {h20-gbm => h2o-gbm}/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java (100%) rename {h20-gbm => h2o-gbm}/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java (100%) rename {h20-gbm => h2o-gbm}/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java (100%) rename {h20-gbm => h2o-gbm}/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java (100%) rename {h20-gbm => h2o-gbm}/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java (100%) rename {h20-gbm => h2o-gbm}/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java (100%) diff --git a/h20-gbm/pom.xml b/h2o-gbm/pom.xml similarity index 99% rename from h20-gbm/pom.xml rename to h2o-gbm/pom.xml index ab7a62f..b7309bc 100644 --- a/h20-gbm/pom.xml +++ b/h2o-gbm/pom.xml @@ -4,7 +4,7 @@ 4.0.0 com.github.kaiwaehner.kafka.streams.machinelearning - h20-gbm + h2o-gbm CP51_AK21 diff --git a/h20-gbm/readme.md b/h2o-gbm/readme.md similarity index 97% rename from h20-gbm/readme.md rename to h2o-gbm/readme.md index acafc71..3e05a7b 100644 --- a/h20-gbm/readme.md +++ b/h2o-gbm/readme.md @@ -38,7 +38,7 @@ You can easily test this by yourself. Here are the steps: kafka-topics --zookeeper localhost:2181 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1 - Start the Kafka Streams app: - java -cp target/h20-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example + java -cp target/h2o-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example - Send messages, e.g. with kafkacat: echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic @@ -69,7 +69,7 @@ This shows how you can easily test or replace different analytic models for one Same as above but change class to start app: - Start the Kafka Streams app: - java -cp target/h20-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_DeepLearning_Example + java -cp target/h2o-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_DeepLearning_Example diff --git a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java b/h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java similarity index 100% rename from h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java rename to h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java diff --git a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java b/h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java similarity index 100% rename from h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java rename to h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java diff --git a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java b/h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java similarity index 100% rename from h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java rename to h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java diff --git a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java b/h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java similarity index 100% rename from h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java rename to h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java diff --git a/h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java b/h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java similarity index 100% rename from h20-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java rename to h2o-gbm/src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java diff --git a/h20-gbm/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip b/h2o-gbm/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip similarity index 100% rename from h20-gbm/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip rename to h2o-gbm/src/main/resources/generatedModels/DeepWater_model_python_1503570558230_1.zip diff --git a/h20-gbm/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip b/h2o-gbm/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip similarity index 100% rename from h20-gbm/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip rename to h2o-gbm/src/main/resources/generatedModels/GBM_model_python_1503397740678_1.zip diff --git a/h20-gbm/src/main/resources/log4j.properties b/h2o-gbm/src/main/resources/log4j.properties similarity index 100% rename from h20-gbm/src/main/resources/log4j.properties rename to h2o-gbm/src/main/resources/log4j.properties diff --git a/h20-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java b/h2o-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java similarity index 100% rename from h20-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java rename to h2o-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java diff --git a/h20-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java b/h2o-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java similarity index 100% rename from h20-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java rename to h2o-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java diff --git a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java similarity index 100% rename from h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java rename to h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java diff --git a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java similarity index 100% rename from h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java rename to h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java diff --git a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java similarity index 100% rename from h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java rename to h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java diff --git a/h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java similarity index 100% rename from h20-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java rename to h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java diff --git a/pom.xml b/pom.xml index 3da61e0..991f1ae 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ - h20-gbm + h2o-gbm tensorflow-image-recognition diff --git a/readme.md b/readme.md index 169216a..91c266c 100644 --- a/readme.md +++ b/readme.md @@ -62,7 +62,7 @@ If you want to run an implementation of a main class in package **com.github.meg ### Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays -Detailed info in [h20-gbm](h20-gbm/readme.md) +Detailed info in [h2o-gbm](h2o-gbm/readme.md) ### Example 2 - Convolutional Neural Network (CNN) with TensorFlow for Image Recognition From 80ef2d8fb5aaa4d9e7bab0760f129dc5f27541e3 Mon Sep 17 00:00:00 2001 From: Jukka Karvanen <48978068+jukkakarvanen@users.noreply.github.com> Date: Mon, 1 Apr 2019 10:59:58 +0300 Subject: [PATCH 16/20] Formatting --- h2o-gbm/readme.md | 1 + 1 file changed, 1 insertion(+) diff --git a/h2o-gbm/readme.md b/h2o-gbm/readme.md index 3e05a7b..cdafd32 100644 --- a/h2o-gbm/readme.md +++ b/h2o-gbm/readme.md @@ -69,6 +69,7 @@ This shows how you can easily test or replace different analytic models for one Same as above but change class to start app: - Start the Kafka Streams app: + java -cp target/h2o-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_DeepLearning_Example From c073203cb65138211bab980bae28f07b4d1ee588 Mon Sep 17 00:00:00 2001 From: Kai Waehner Date: Wed, 3 Apr 2019 10:54:41 +0200 Subject: [PATCH 17/20] Ignore configuration files for Visual Studio Code --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 108e0cc..1803966 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,7 @@ # Maven log/ -target/ \ No newline at end of file +target/ + +# Visual Studio Code +.vscode/ From d82df6e11c4f184b3425eb626c5747ee74b14eaa Mon Sep 17 00:00:00 2001 From: Kai Waehner Date: Wed, 3 Apr 2019 14:17:27 +0200 Subject: [PATCH 18/20] Moved integration tests into test folder --- ...afka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java | 0 .../Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/{ => test}/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java (100%) rename h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/{ => test}/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java (100%) diff --git a/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java similarity index 100% rename from h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java rename to h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java diff --git a/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java similarity index 100% rename from h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java rename to h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java From e8ebcbc2df671d4c5a9fcd4d9704db474d109fe6 Mon Sep 17 00:00:00 2001 From: Kai Waehner Date: Wed, 3 Apr 2019 15:09:56 +0200 Subject: [PATCH 19/20] Moved all test classes into same folder --- ...Learning_H2O_DeepLearning_ExampleTest.java | 101 ++++++++++++++++++ ..._DeepLearning_Example_IntegrationTest.java | 4 +- ...s_MachineLearning_H2O_GBM_ExampleTest.java | 100 +++++++++++++++++ ...rning_H2O_GBM_Example_IntegrationTest.java | 12 ++- .../TestEmbeddedKafkaCluster.java | 19 ++-- .../machinelearning}/TestKafkaStreams.java | 13 ++- ...Learning_H2O_DeepLearning_ExampleTest.java | 85 --------------- ...s_MachineLearning_H2O_GBM_ExampleTest.java | 85 --------------- 8 files changed, 229 insertions(+), 190 deletions(-) create mode 100644 h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java rename h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/{test => }/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java (98%) create mode 100644 h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java rename h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/{test => }/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java (96%) rename h2o-gbm/src/test/java/com/github/{jukkakarvanen/kafka/streams/integration/utils => megachucky/kafka/streams/machinelearning}/TestEmbeddedKafkaCluster.java (66%) rename h2o-gbm/src/test/java/com/github/{jukkakarvanen/kafka/streams/integration/utils => megachucky/kafka/streams/machinelearning}/TestKafkaStreams.java (74%) delete mode 100644 h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java delete mode 100644 h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java diff --git a/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java new file mode 100644 index 0000000..17e65f3 --- /dev/null +++ b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java @@ -0,0 +1,101 @@ +package com.github.megachucky.kafka.streams.machinelearning; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.TopologyTestDriver; +import org.apache.kafka.streams.test.ConsumerRecordFactory; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; + +/** + * TopologyTestDriver based test about stream processing of + * Kafka_Streams_TensorFlow_Image_Recognition_Example. + * + * @author Jukka Karvanen / jukinimi.com * Unit Test of + * {@link Kafka_Streams_MachineLearning_H2O_DeepLearning_Example}, using + * an TopologyTestDriver and a H2O DeepLearning model. + * + */ + +public class Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest { + private TopologyTestDriver testDriver; + + private StringDeserializer stringDeserializer = new StringDeserializer(); + private ConsumerRecordFactory recordFactory = new ConsumerRecordFactory<>( + new StringSerializer(), new StringSerializer()); + + @Before + public void setup() throws IllegalAccessException, ClassNotFoundException, InstantiationException { + testDriver = new TopologyTestDriver( + Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.getStreamTopology( + Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.modelClassName), + Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.getStreamConfiguration( + "localhost:9092", + Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.APPLICATION_ID)); + } + + @After + public void tearDown() { + try { + testDriver.close(); + } catch (RuntimeException e) { + // https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when + // executed in Windows, ignoring it + // Logged stacktrace cannot be avoided + System.out.println("Ignoring exception, test failing in Windows due this exception:" + + e.getLocalizedMessage()); + } + } + + private String getOutput() { + ProducerRecord output = testDriver.readOutput( + Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.OUTPUT_TOPIC, stringDeserializer, + stringDeserializer); + assertThat(output).isNotNull(); + return output.value(); + } + + /** + * Simple test validating only the prediction part of the output + */ + @Test + public void testOne() { + testDriver.pipeInput(recordFactory.create( + Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.INPUT_TOPIC, null, + "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", + 1L)); + assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); + } + + /** + * Test based on + * Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest + * + */ + @Test + public void testList() { + // Flight data (one single flight) --> We want to predict if it will be + // delayed or not + List inputValues = Arrays.asList( + "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", + "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES"); + List> records = inputValues.stream() + .map(v -> new KeyValue(null, v)).collect(Collectors.toList()); + + testDriver.pipeInput(recordFactory.create( + Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.INPUT_TOPIC, records, 1L, 100L)); + assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); + // This model predict also another flight to be delayed + assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); + } + +} diff --git a/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java similarity index 98% rename from h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java rename to h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java index 01a07c8..c43e657 100644 --- a/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java +++ b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java @@ -1,4 +1,4 @@ -package com.github.megachucky.kafka.streams.machinelearning.test; +package com.github.megachucky.kafka.streams.machinelearning; import static org.assertj.core.api.Assertions.assertThat; @@ -6,7 +6,7 @@ import java.util.List; import java.util.Properties; -import com.github.jukkakarvanen.kafka.streams.integration.utils.TestEmbeddedKafkaCluster; +import com.github.megachucky.kafka.streams.machinelearning.TestEmbeddedKafkaCluster; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.serialization.Serdes; diff --git a/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java new file mode 100644 index 0000000..d04f2b5 --- /dev/null +++ b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java @@ -0,0 +1,100 @@ +package com.github.megachucky.kafka.streams.machinelearning; + +import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.serialization.StringDeserializer; +import org.apache.kafka.common.serialization.StringSerializer; +import org.apache.kafka.streams.KeyValue; +import org.apache.kafka.streams.TopologyTestDriver; +import org.apache.kafka.streams.test.ConsumerRecordFactory; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; + +/** + * TopologyTestDriver based test about stream processing of + * Kafka_Streams_TensorFlow_Image_Recognition_Example. + * + * @author Jukka Karvanen / jukinimi.com * Unit Test of + * {@link Kafka_Streams_MachineLearning_H2O_GBM_Example}, using an + * TopologyTestDriver and a H2O GBM model. + * + */ + +public class Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest { + private TopologyTestDriver testDriver; + + private StringDeserializer stringDeserializer = new StringDeserializer(); + private ConsumerRecordFactory recordFactory = new ConsumerRecordFactory<>( + new StringSerializer(), new StringSerializer()); + + @Before + public void setup() throws IllegalAccessException, ClassNotFoundException, InstantiationException { + testDriver = new TopologyTestDriver( + Kafka_Streams_MachineLearning_H2O_GBM_Example.getStreamTopology( + Kafka_Streams_MachineLearning_H2O_GBM_Example.modelClassName), + Kafka_Streams_MachineLearning_H2O_GBM_Example.getStreamConfiguration("localhost:9092", + Kafka_Streams_MachineLearning_H2O_GBM_Example.APPLICATION_ID)); + } + + @After + public void tearDown() { + try { + testDriver.close(); + } catch (RuntimeException e) { + // https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when + // executed in Windows, ignoring it + // Logged stacktrace cannot be avoided + System.out.println("Ignoring exception, test failing in Windows due this exception:" + + e.getLocalizedMessage()); + } + } + + private String getOutput() { + ProducerRecord output = testDriver.readOutput( + Kafka_Streams_MachineLearning_H2O_GBM_Example.OUTPUT_TOPIC, stringDeserializer, + stringDeserializer); + assertThat(output).isNotNull(); + return output.value(); + } + + /** + * Simple test validating only the prediction part of the output + */ + @Test + public void testOne() { + testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_GBM_Example.INPUT_TOPIC, + null, + "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", + 1L)); + assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); + } + + /** + * Test based on + * Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest + * + */ + @Test + public void testList() { + // Flight data (one single flight) --> We want to predict if it will be + // delayed or not + List inputValues = Arrays.asList( + "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", + "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES"); + List> records = inputValues.stream() + .map(v -> new KeyValue(null, v)).collect(Collectors.toList()); + + testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_GBM_Example.INPUT_TOPIC, + records, 1L, 100L)); + assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); + assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => NO"); + } + +} diff --git a/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java similarity index 96% rename from h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java rename to h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java index 5e23ee6..690d96d 100644 --- a/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java +++ b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java @@ -1,4 +1,4 @@ -package com.github.megachucky.kafka.streams.machinelearning.test; +package com.github.megachucky.kafka.streams.machinelearning; import static org.assertj.core.api.Assertions.assertThat; @@ -6,7 +6,7 @@ import java.util.List; import java.util.Properties; -import com.github.jukkakarvanen.kafka.streams.integration.utils.TestEmbeddedKafkaCluster; +import com.github.megachucky.kafka.streams.machinelearning.TestEmbeddedKafkaCluster; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.common.serialization.Serdes; @@ -47,8 +47,9 @@ public class Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest { @ClassRule -// public static final EmbeddedSingleNodeKafkaCluster CLUSTER = new EmbeddedSingleNodeKafkaCluster(); - + // public static final EmbeddedSingleNodeKafkaCluster CLUSTER = new + // EmbeddedSingleNodeKafkaCluster(); + public static final EmbeddedKafkaCluster CLUSTER = new TestEmbeddedKafkaCluster(1); private static final String inputTopic = "AirlineInputTopic"; @@ -184,7 +185,8 @@ public void shouldPredictFlightDelay() throws Exception { producerConfig.put(ProducerConfig.RETRIES_CONFIG, 0); producerConfig.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class); producerConfig.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class); - IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig, new MockTime()); + IntegrationTestUtils.produceValuesSynchronously(inputTopic, inputValues, producerConfig, + new MockTime()); // // Step 3: Verify the application's output data. diff --git a/h2o-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/TestEmbeddedKafkaCluster.java similarity index 66% rename from h2o-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java rename to h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/TestEmbeddedKafkaCluster.java index d3c7b18..d628bad 100644 --- a/h2o-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestEmbeddedKafkaCluster.java +++ b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/TestEmbeddedKafkaCluster.java @@ -1,4 +1,4 @@ -package com.github.jukkakarvanen.kafka.streams.integration.utils; +package com.github.megachucky.kafka.streams.machinelearning; import org.apache.kafka.streams.integration.utils.EmbeddedKafkaCluster; import org.slf4j.Logger; @@ -6,15 +6,17 @@ import java.util.Properties; -/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. +/** + * This is helper class to workaround for Failing stream tests in Windows + * environment KAFKA-6647. * * @author Jukka Karvanen * - * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 - * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will catch and ignore the exception - * happening during the tear down of the test - * The exception does not have affect to functionality - */ + * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 + * Replacing EmbeddedKafkaCluster with TestEmbeddedKafkaCluster will + * catch and ignore the exception happening during the tear down of the + * test The exception does not have affect to functionality + */ public class TestEmbeddedKafkaCluster extends EmbeddedKafkaCluster { private static final Logger log = LoggerFactory.getLogger(TestEmbeddedKafkaCluster.class); @@ -31,7 +33,8 @@ public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mo super(numBrokers, brokerConfig, mockTimeMillisStart); } - public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, long mockTimeNanoStart) { + public TestEmbeddedKafkaCluster(int numBrokers, Properties brokerConfig, long mockTimeMillisStart, + long mockTimeNanoStart) { super(numBrokers, brokerConfig, mockTimeMillisStart, mockTimeNanoStart); } diff --git a/h2o-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/TestKafkaStreams.java similarity index 74% rename from h2o-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java rename to h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/TestKafkaStreams.java index ef2bc4e..1b2f366 100644 --- a/h2o-gbm/src/test/java/com/github/jukkakarvanen/kafka/streams/integration/utils/TestKafkaStreams.java +++ b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/TestKafkaStreams.java @@ -1,4 +1,4 @@ -package com.github.jukkakarvanen.kafka.streams.integration.utils; +package com.github.megachucky.kafka.streams.machinelearning; import org.apache.kafka.common.utils.Time; import org.apache.kafka.streams.KafkaClientSupplier; @@ -9,13 +9,16 @@ import java.util.Properties; -/** This is helper class to workaround for Failing stream tests in Windows environment KAFKA-6647. +/** + * This is helper class to workaround for Failing stream tests in Windows + * environment KAFKA-6647. * * @author Jukka Karvanen * - * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 - * Replacing KafkaStreams with TestKafkaStreams will catch and ignore the exception caused by cleanUp - * The exception does not have affect to functionality + * The causing issue is https://issues.apache.org/jira/browse/KAFKA-6647 + * Replacing KafkaStreams with TestKafkaStreams will catch and ignore + * the exception caused by cleanUp The exception does not have affect to + * functionality */ public class TestKafkaStreams extends KafkaStreams { diff --git a/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java deleted file mode 100644 index 07fdc14..0000000 --- a/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java +++ /dev/null @@ -1,85 +0,0 @@ -package com.github.megachucky.kafka.streams.machinelearning; - -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.kafka.common.serialization.StringDeserializer; -import org.apache.kafka.common.serialization.StringSerializer; -import org.apache.kafka.streams.KeyValue; -import org.apache.kafka.streams.TopologyTestDriver; -import org.apache.kafka.streams.test.ConsumerRecordFactory; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - -import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; - -/** - * TopologyTestDriver based test about stream processing of Kafka_Streams_TensorFlow_Image_Recognition_Example. - * - * @author Jukka Karvanen / jukinimi.com - * * Unit Test of - * {@link Kafka_Streams_MachineLearning_H2O_DeepLearning_Example}, using an - * TopologyTestDriver and a H2O DeepLearning model. - * - */ - -public class Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest { - private TopologyTestDriver testDriver; - - private StringDeserializer stringDeserializer = new StringDeserializer(); - private ConsumerRecordFactory recordFactory = new ConsumerRecordFactory<>(new StringSerializer(), new StringSerializer()); - - @Before - public void setup() throws IllegalAccessException, ClassNotFoundException, InstantiationException { - testDriver = new TopologyTestDriver(Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.getStreamTopology(Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.modelClassName), Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.getStreamConfiguration("localhost:9092",Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.APPLICATION_ID)); - } - - @After - public void tearDown() { - try { - testDriver.close(); - } catch (RuntimeException e) { - // https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when executed in Windows, ignoring it - // Logged stacktrace cannot be avoided - System.out.println("Ignoring exception, test failing in Windows due this exception:" + e.getLocalizedMessage()); - } - } - - private String getOutput() { - ProducerRecord output = testDriver.readOutput(Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.OUTPUT_TOPIC, stringDeserializer, stringDeserializer); - assertThat(output).isNotNull(); - return output.value(); - } - - /** Simple test validating only the prediction part of the output - */ - @Test - public void testOne() { - testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.INPUT_TOPIC, null, "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", 1L)); - assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); - } - - /** Test based on Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest - * - */ - @Test - public void testList() { - // Flight data (one single flight) --> We want to predict if it will be - // delayed or not - List inputValues = Arrays.asList( - "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", - "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES"); - List> records = inputValues.stream().map(v -> new KeyValue(null, v)).collect(Collectors.toList()); - - - testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.INPUT_TOPIC, records, 1L, 100L)); - assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); - //This model predict also another flight to be delayed - assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); - } - - -} diff --git a/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java b/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java deleted file mode 100644 index 9c2ad70..0000000 --- a/h2o-gbm/src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java +++ /dev/null @@ -1,85 +0,0 @@ -package com.github.megachucky.kafka.streams.machinelearning; - -import org.apache.kafka.clients.producer.ProducerRecord; -import org.apache.kafka.common.serialization.StringDeserializer; -import org.apache.kafka.common.serialization.StringSerializer; -import org.apache.kafka.streams.KeyValue; -import org.apache.kafka.streams.TopologyTestDriver; -import org.apache.kafka.streams.test.ConsumerRecordFactory; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; - -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - -import static org.assertj.core.api.AssertionsForInterfaceTypes.assertThat; - -/** - * TopologyTestDriver based test about stream processing of Kafka_Streams_TensorFlow_Image_Recognition_Example. - * - * @author Jukka Karvanen / jukinimi.com - * * Unit Test of - * {@link Kafka_Streams_MachineLearning_H2O_GBM_Example}, using an - * TopologyTestDriver and a H2O GBM model. - * - */ - -public class Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest { - private TopologyTestDriver testDriver; - - private StringDeserializer stringDeserializer = new StringDeserializer(); - private ConsumerRecordFactory recordFactory = new ConsumerRecordFactory<>(new StringSerializer(), new StringSerializer()); - - @Before - public void setup() throws IllegalAccessException, ClassNotFoundException, InstantiationException { - testDriver = new TopologyTestDriver(Kafka_Streams_MachineLearning_H2O_GBM_Example.getStreamTopology(Kafka_Streams_MachineLearning_H2O_GBM_Example.modelClassName), Kafka_Streams_MachineLearning_H2O_GBM_Example.getStreamConfiguration("localhost:9092",Kafka_Streams_MachineLearning_H2O_GBM_Example.APPLICATION_ID)); - } - - @After - public void tearDown() { - try { - testDriver.close(); - } catch (RuntimeException e) { - // https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when executed in Windows, ignoring it - // Logged stacktrace cannot be avoided - System.out.println("Ignoring exception, test failing in Windows due this exception:" + e.getLocalizedMessage()); - } - } - - private String getOutput() { - ProducerRecord output = testDriver.readOutput(Kafka_Streams_MachineLearning_H2O_GBM_Example.OUTPUT_TOPIC, stringDeserializer, stringDeserializer); - assertThat(output).isNotNull(); - return output.value(); - } - - /** Simple test validating only the prediction part of the output - */ - @Test - public void testOne() { - testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_GBM_Example.INPUT_TOPIC, null, "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", 1L)); - assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); - } - - /** Test based on Kafka_Streams_TensorFlow_Image_Recognition_Example_IntegrationTest - * - */ - @Test - public void testList() { - // Flight data (one single flight) --> We want to predict if it will be - // delayed or not - List inputValues = Arrays.asList( - "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES", - "1999,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES"); - List> records = inputValues.stream().map(v -> new KeyValue(null, v)).collect(Collectors.toList()); - - - testDriver.pipeInput(recordFactory.create(Kafka_Streams_MachineLearning_H2O_GBM_Example.INPUT_TOPIC, records, 1L, 100L)); - assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => YES"); - assertThat(getOutput()).isEqualTo("Prediction: Is Airline delayed? => NO"); - } - - -} From 1179c72f1265460cbd301cb38c11588524ff540b Mon Sep 17 00:00:00 2001 From: Kai Waehner Date: Wed, 3 Apr 2019 15:59:20 +0200 Subject: [PATCH 20/20] More detailed docs + fix of manual test description --- h2o-gbm/readme.md | 63 ++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/h2o-gbm/readme.md b/h2o-gbm/readme.md index cdafd32..448f92b 100644 --- a/h2o-gbm/readme.md +++ b/h2o-gbm/readme.md @@ -2,76 +2,83 @@ General info in main [Readme](../readme.md) -### Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays +## Example 1 - Gradient Boosting with H2O.ai for Prediction of Flight Delays -**Use Case** +### Use Case Gradient Boosting Method (GBM) to predict flight delays. A H2O generated GBM Java model (POJO) is instantiated and used in a Kafka Streams application to do interference on new events. -**Machine Learning Technology** +### Machine Learning Technology * [H2O](https://www.h2o.ai) * Check the [H2O demo](https://github.com/h2oai/h2o-2/wiki/Hacking-Airline-DataSet-with-H2O) to understand the test and and how the model was built * You can re-use the generated Java model attached to this project ([gbm_pojo_test.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/gbm_pojo_test.java)) or build your own model using R, Python, Flow UI or any other technologies supported by H2O framework. -**Source Code** +### Source Code +Business Logic (applying the analytic model to do the prediction): +[Kafka_Streams_MachineLearning_H2O_Application.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java) + +Specification of the used model: [Kafka_Streams_MachineLearning_H2O_GBM_Example.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example.java) -->Logic in [Kafka_Streams_MachineLearning_H2O_Application.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java) -**Unit Test** +### Automated Tests +Unit Test using TopologyTestDriver: [Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_ExampleTest.java) -[Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java) -**Manual Testing** +Integration Test using EmbeddedKafkaCluster: +[Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_GBM_Example_IntegrationTest.java) + +### Manual Testing You can easily test this by yourself. Here are the steps: -- Start Kafka, e.g. with Confluent CLI: + +* Start Kafka, e.g. with Confluent CLI: confluent start kafka -- Create topics AirlineInputTopic and AirlineOutputTopic +* Create topics AirlineInputTopic and AirlineOutputTopic kafka-topics --zookeeper localhost:2181 --create --topic AirlineInputTopic --partitions 3 --replication-factor 1 kafka-topics --zookeeper localhost:2181 --create --topic AirlineOutputTopic --partitions 3 --replication-factor 1 -- Start the Kafka Streams app: +* Start the Kafka Streams app: - java -cp target/h2o-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example -- Send messages, e.g. with kafkacat: + java -cp h2o-gbm/target/h2o-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_GBM_Example +* Send messages, e.g. with kafkacat: echo -e "1987,10,14,3,741,730,912,849,PS,1451,NA,91,79,NA,23,11,SAN,SFO,447,NA,NA,0,NA,0,NA,NA,NA,NA,NA,YES,YES" | kafkacat -b localhost:9092 -P -t AirlineInputTopic -- Consume predictions: +* Consume predictions: kafka-console-consumer --bootstrap-server localhost:9092 --topic AirlineOutputTopic --from-beginning -- Find more details in the unit test... +* Find more details in the unit test... - -**H2O Deep Learning instead of H2O GBM Model** +## H2O Deep Learning instead of H2O GBM Model The project includes another example with similar code to use a [H2O Deep Learning model](src/main/java/com/github/megachucky/kafka/streams/machinelearning/models/deeplearning_fe7c1f02_08ec_4070_b784_c2531147e451.java) instead of H2O GBM Model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) This shows how you can easily test or replace different analytic models for one use case, or even use them for A/B testing. -**Source Code** +### Source Code + +Business Logic (applying the analytic model to do the prediction): +[Kafka_Streams_MachineLearning_H2O_Application.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java) +Specification of the used model: [Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example.java) -->Logic in [Kafka_Streams_MachineLearning_H2O_Application.java](src/main/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_Application.java) -**Unit Test** +### Unit Test +Unit Test using TopologyTestDriver: [Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_ExampleTest.java) -[Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/test/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) +Integration Test using EmbeddedKafkaCluster: +[Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java](src/test/java/com/github/megachucky/kafka/streams/machinelearning/Kafka_Streams_MachineLearning_H2O_DeepLearning_Example_IntegrationTest.java) -**Manual Testing** +### Manual Testing Same as above but change class to start app: -- Start the Kafka Streams app: - - java -cp target/h2o-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_DeepLearning_Example - - - +* Start the Kafka Streams app: + java -cp h2o-gbm/target/h2o-gbm-CP51_AK21-jar-with-dependencies.jar com.github.megachucky.kafka.streams.machinelearning.Kafka_Streams_MachineLearning_H2O_DeepLearning_Example \ No newline at end of file