diff --git a/example/package.mill b/example/package.mill index 6eb137081d2..437c68f5adc 100644 --- a/example/package.mill +++ b/example/package.mill @@ -269,7 +269,7 @@ $txt if (seenCode) "" else { val exampleDashed = examplePath.segments.mkString("-") - val download = + val download = s"{mill-download-url}/mill-dist-${build.millVersion()}-$exampleDashed.zip[download]" val browse = s"{mill-example-url}/$examplePath[browse]" s".build.mill ($download, $browse)" diff --git a/example/scalalib/spark/2-hello-pyspark/build.mill b/example/scalalib/spark/2-hello-pyspark/build.mill index c0a189f142d..5dfde6d9ae8 100644 --- a/example/scalalib/spark/2-hello-pyspark/build.mill +++ b/example/scalalib/spark/2-hello-pyspark/build.mill @@ -10,6 +10,8 @@ object foo extends PythonModule { } +// This examples demonstrates running pyspark using mill. + /** Usage > ./mill foo.run diff --git a/example/scalalib/spark/3-spark-streaming/build.mill b/example/scalalib/spark/3-spark-streaming/build.mill new file mode 100644 index 00000000000..9bd8574de81 --- /dev/null +++ b/example/scalalib/spark/3-spark-streaming/build.mill @@ -0,0 +1,132 @@ +package build +import mill._, scalalib._ + +object foo extends ScalaModule { + def scalaVersion = "2.12.15" + def mvnDeps = Seq( + mvn"org.apache.spark::spark-core:3.5.4", + mvn"org.apache.spark::spark-sql:3.5.4", + mvn"org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.4" + ) + + def forkArgs = Seq("--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED") + + def forkEnv: T[Map[String, String]] = Map("KAFKA_SERVER" -> "localhost:9092") + + def createTopic(args: mill.define.Args) = Task.Command { + val topic = args.value.mkString(" ") + val server = forkEnv()("KAFKA_SERVER") + os.call( + ( + "docker-compose", + "exec", + "kafka", + "kafka-topics", + "--create", + "--topic", + topic, + "--bootstrap-server", + server + ), + stdout = os.Inherit + ) + } + + def listTopics: T[Unit] = Task { + val server = forkEnv()("KAFKA_SERVER") + os.call( + Seq( + "docker-compose", + "exec", + "kafka", + "kafka-topics", + "--list", + "--bootstrap-server", + server + ), + stdout = os.Inherit + ) + } + + def describeTopic(args: mill.define.Args) = Task.Command { + val topic = args.value.mkString(" ") + val server = forkEnv()("KAFKA_SERVER") + os.call( + Seq( + "docker-compose", + "exec", + "kafka", + "kafka-topics", + "--describe", + "--topic", + topic, + "--bootstrap-server", + server + ), + stdout = os.Inherit + ) + } + + def readAllMessages(args: mill.define.Args) = Task.Command { + val topic = args.value.mkString(" ") + val server = forkEnv()("KAFKA_SERVER") + os.call( + Seq( + "docker-compose", + "exec", + "kafka", + "kafka-console-consumer", + "--bootstrap-server", + server, + "--topic", + topic, + "--from-beginning", + "--timeout-ms", + "1000" + ), + stdout = os.Inherit + ) + } + +} + +object producer extends ScalaModule { + def scalaVersion = "2.12.15" + def mvnDeps = Seq( + mvn"org.apache.kafka:kafka-clients:3.7.0" + ) +} + +// This example demonstrates how to run a Spark Streaming application using Mill. +// It showcases how Mill can be used to manage a data source like Kafka by defining +// custom Mill tasks to execute administrative Kafka commands. This allows for intuitive, +// scriptable management of Kafka directly through Mill. + +/** Usage + +> docker-compose up -d + +> ./mill foo.createTopic "test-topic" # Custom mill task to create topic. +... +Created topic test-topic. + +> ./mill foo.listTopics # Custom mill task to list all topics. +... +test-topic + +> ./mill foo.describeTopic "test-topic" # Custom mill task to describe a topic. +... +Topic: test-topic... + +> ./mill producer.run # Send message + +> ./mill foo.run # Consume message with spark. +... ++-------------+ +| message| ++-------------+ +|Hello, World!| ++-------------+ + +> docker-compose down -v --remove-orphans +*/ diff --git a/example/scalalib/spark/3-spark-streaming/docker-compose.yml b/example/scalalib/spark/3-spark-streaming/docker-compose.yml new file mode 100644 index 00000000000..7739db11b20 --- /dev/null +++ b/example/scalalib/spark/3-spark-streaming/docker-compose.yml @@ -0,0 +1,22 @@ +version: '3.8' + +services: + zookeeper: + image: confluentinc/cp-zookeeper:7.5.0 + ports: + - "2181:2181" + environment: + ZOOKEEPER_CLIENT_PORT: 2181 + ZOOKEEPER_TICK_TIME: 2000 + + kafka: + image: confluentinc/cp-kafka:7.5.0 + ports: + - "9092:9092" + environment: + KAFKA_BROKER_ID: 1 + KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092 + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + depends_on: + - zookeeper \ No newline at end of file diff --git a/example/scalalib/spark/3-spark-streaming/foo/src/foo/Foo.scala b/example/scalalib/spark/3-spark-streaming/foo/src/foo/Foo.scala new file mode 100644 index 00000000000..c7b772b16a5 --- /dev/null +++ b/example/scalalib/spark/3-spark-streaming/foo/src/foo/Foo.scala @@ -0,0 +1,33 @@ +package foo + +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.apache.spark.sql.functions._ + +object Foo { + + def streamFromKafka(spark: SparkSession): DataFrame = { + spark.readStream + .format("kafka") + .option("kafka.bootstrap.servers", sys.env("KAFKA_SERVER")) + .option("subscribe", "test-topic") + .option("startingOffsets", "earliest") + .load() + .selectExpr("CAST(value AS STRING)").toDF("message") + } + + def main(args: Array[String]): Unit = { + val spark = SparkSession.builder() + .appName("HelloWorldKafka") + .master("local[*]") + .getOrCreate() + + val df = streamFromKafka(spark) + + val query = df.writeStream + .format("console") + .outputMode("append") + .start() + + query.awaitTermination(9000) + } +} diff --git a/example/scalalib/spark/3-spark-streaming/producer/src/producer/Producer.scala b/example/scalalib/spark/3-spark-streaming/producer/src/producer/Producer.scala new file mode 100644 index 00000000000..e995e222bd9 --- /dev/null +++ b/example/scalalib/spark/3-spark-streaming/producer/src/producer/Producer.scala @@ -0,0 +1,13 @@ +import org.apache.kafka.clients.producer._ + +object Producer { + def main(args: Array[String]): Unit = { + val props = new java.util.Properties() + props.put("bootstrap.servers", "localhost:9092") + props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") + props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") + val producer = new KafkaProducer[String, String](props) + producer.send(new ProducerRecord[String, String]("test-topic", "Hello, World!")) + producer.close() + } +} diff --git a/example/scalalib/spark/4-hello-delta/build.mill b/example/scalalib/spark/4-hello-delta/build.mill new file mode 100644 index 00000000000..8cbe4574c52 --- /dev/null +++ b/example/scalalib/spark/4-hello-delta/build.mill @@ -0,0 +1,38 @@ +package build +import mill._, scalalib._ + +object foo extends ScalaModule { + def scalaVersion = "2.12.15" + def mvnDeps = Seq( + mvn"org.apache.spark::spark-core:3.4.0", + mvn"org.apache.spark::spark-sql:3.4.0", + mvn"io.delta:delta-core_2.12:2.4.0" + ) + + def forkArgs = Seq("--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED") + + object test extends ScalaTests { + def mvnDeps = Seq(mvn"com.lihaoyi::utest:0.8.5") + def testFramework = "utest.runner.Framework" + + def forkArgs = Seq("--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED") + } + +} + +// This example demonstrates running a Spark application with Delta Lake as the storage layer, using Mill. + +/** Usage + +> ./mill foo.run ++-------------+ +| message| ++-------------+ +|Hello, Delta!| ++-------------+ + +> ./mill foo.test +... ++ foo.FooTests.Delta table should contain one row with 'Hello, Delta!'... +... +*/ diff --git a/example/scalalib/spark/4-hello-delta/foo/src/foo/Foo.scala b/example/scalalib/spark/4-hello-delta/foo/src/foo/Foo.scala new file mode 100644 index 00000000000..7b4b05cbacd --- /dev/null +++ b/example/scalalib/spark/4-hello-delta/foo/src/foo/Foo.scala @@ -0,0 +1,33 @@ +package foo + +import org.apache.spark.sql.{DataFrame, SparkSession} +import io.delta.tables._ + +object Foo { + + def writeDeltaTable(spark: SparkSession, path: String): Unit = { + import spark.implicits._ + val data = Seq("Hello, Delta!").toDF("message") + data.write.format("delta").mode("overwrite").save(path) + } + + def readDeltaTable(spark: SparkSession, path: String): DataFrame = { + spark.read.format("delta").load(path) + } + + def main(args: Array[String]): Unit = { + val spark = SparkSession.builder() + .appName("HelloDelta") + .master("local[*]") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .getOrCreate() + + val deltaPath = "tmp/delta-table" + + writeDeltaTable(spark, deltaPath) + readDeltaTable(spark, deltaPath).show() + + spark.stop() + } +} diff --git a/example/scalalib/spark/4-hello-delta/foo/test/src/FooTests.scala b/example/scalalib/spark/4-hello-delta/foo/test/src/FooTests.scala new file mode 100644 index 00000000000..b9b76fafb36 --- /dev/null +++ b/example/scalalib/spark/4-hello-delta/foo/test/src/FooTests.scala @@ -0,0 +1,29 @@ +package foo + +import org.apache.spark.sql.SparkSession +import utest._ + +object FooTests extends TestSuite { + def tests = Tests { + test("Delta table should contain one row with 'Hello, Delta!'") { + val spark = SparkSession.builder() + .appName("FooDeltaTest") + .master("local[*]") + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") + .config( + "spark.sql.catalog.spark_catalog", + "org.apache.spark.sql.delta.catalog.DeltaCatalog" + ) + .getOrCreate() + + val path = "tmp/test-delta-table" + Foo.writeDeltaTable(spark, path) + val df = Foo.readDeltaTable(spark, path) + + val messages = df.collect().map(_.getString(0)).toList + assert(messages == List("Hello, Delta!")) + + spark.stop() + } + } +} diff --git a/example/scalalib/spark/5-hello-iceberg/build.mill b/example/scalalib/spark/5-hello-iceberg/build.mill new file mode 100644 index 00000000000..a430085d439 --- /dev/null +++ b/example/scalalib/spark/5-hello-iceberg/build.mill @@ -0,0 +1,44 @@ +package build +import mill._, scalalib._ + +object foo extends ScalaModule { + def scalaVersion = "2.12.15" + def mvnDeps = Seq( + mvn"org.apache.spark::spark-core:3.5.4", + mvn"org.apache.spark::spark-sql:3.5.4", + mvn"org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.4.3" + ) + + def forkArgs = Seq( + "--add-opens=java.base/java.nio=ALL-UNNAMED", + "--add-exports=java.base/sun.nio.ch=ALL-UNNAMED" + ) + + object test extends ScalaTests { + def mvnDeps = Seq(mvn"com.lihaoyi::utest:0.8.5") + def testFramework = "utest.runner.Framework" + + def forkArgs = Seq( + "--add-opens=java.base/java.nio=ALL-UNNAMED", + "--add-exports=java.base/sun.nio.ch=ALL-UNNAMED" + ) + } + +} + +// This example demonstrates running a Spark application with Ice-Berg as the storage layer, using Mill. + +/** Usage + +> ./mill foo.run ++---------------+ +| message| ++---------------+ +|Hello, Iceberg!| ++---------------+ + +> ./mill foo.test +... ++ foo.FooTests.Iceberg table should contain one row with 'Hello, Iceberg!'... +... +*/ diff --git a/example/scalalib/spark/5-hello-iceberg/foo/src/foo/Foo.scala b/example/scalalib/spark/5-hello-iceberg/foo/src/foo/Foo.scala new file mode 100644 index 00000000000..1d705d13328 --- /dev/null +++ b/example/scalalib/spark/5-hello-iceberg/foo/src/foo/Foo.scala @@ -0,0 +1,34 @@ +package foo + +import org.apache.spark.sql.{DataFrame, SparkSession} +import java.nio.file.{Files, Paths} + +object Foo { + + def writeIcebergTable(spark: SparkSession, tableName: String): Unit = { + import spark.implicits._ + val data = Seq("Hello, Iceberg!").toDF("message") + data.writeTo(tableName).using("iceberg").createOrReplace() + } + + def readIcebergTable(spark: SparkSession, tableName: String): DataFrame = { + spark.read.table(tableName) + } + + def main(args: Array[String]): Unit = { + val spark = SparkSession.builder() + .appName("HelloIceberg") + .master("local[*]") + .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog") + .config("spark.sql.catalog.local.type", "hadoop") + .config("spark.sql.catalog.local.warehouse", "tmp/iceberg-warehouse") + .getOrCreate() + + val table = "local.db.hello_iceberg" + + writeIcebergTable(spark, table) + readIcebergTable(spark, table).show() + + spark.stop() + } +} diff --git a/example/scalalib/spark/5-hello-iceberg/foo/test/src/FooTests.scala b/example/scalalib/spark/5-hello-iceberg/foo/test/src/FooTests.scala new file mode 100644 index 00000000000..0291c633f31 --- /dev/null +++ b/example/scalalib/spark/5-hello-iceberg/foo/test/src/FooTests.scala @@ -0,0 +1,27 @@ +package foo + +import org.apache.spark.sql.SparkSession +import utest._ + +object FooTests extends TestSuite { + def tests = Tests { + test("Iceberg table should contain one row with 'Hello, Iceberg!'") { + val spark = SparkSession.builder() + .appName("FooTests") + .master("local[*]") + .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog") + .config("spark.sql.catalog.local.type", "hadoop") + .config("spark.sql.catalog.local.warehouse", "tmp/test-iceberg-warehouse") + .getOrCreate() + + val table = "local.db.hello_iceberg_test" + Foo.writeIcebergTable(spark, table) + val df = Foo.readIcebergTable(spark, table) + + val messages = df.collect().map(_.getString(0)).toList + assert(messages == List("Hello, Iceberg!")) + + spark.stop() + } + } +} diff --git a/example/scalalib/spark/6-hello-mllib/build.mill b/example/scalalib/spark/6-hello-mllib/build.mill new file mode 100644 index 00000000000..0d8e004e3c1 --- /dev/null +++ b/example/scalalib/spark/6-hello-mllib/build.mill @@ -0,0 +1,37 @@ +package build +import mill._, scalalib._ + +object foo extends ScalaModule { + def scalaVersion = "2.12.15" + def mvnDeps = Seq( + mvn"org.apache.spark::spark-core:3.5.4", + mvn"org.apache.spark::spark-sql:3.5.4", + mvn"org.apache.spark::spark-mllib:3.5.4" + ) + + def forkArgs = Seq("--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED") + + object test extends ScalaTests { + def mvnDeps = Seq(mvn"com.lihaoyi::utest:0.8.5") + def testFramework = "utest.runner.Framework" + + def forkArgs = Seq("--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED") + } + +} +// This example demonstrates running a Spark application with MLlib using Mill. + +/** Usage + +> ./mill foo.run ++--------------+ +| words| ++--------------+ +|[hello, mllib]| ++--------------+ + +> ./mill foo.test +... ++ foo.FooTests.'tokenize' should split 'Hello MLlib' into words... +... +*/ diff --git a/example/scalalib/spark/6-hello-mllib/foo/src/foo/Foo.scala b/example/scalalib/spark/6-hello-mllib/foo/src/foo/Foo.scala new file mode 100644 index 00000000000..99af9c775ba --- /dev/null +++ b/example/scalalib/spark/6-hello-mllib/foo/src/foo/Foo.scala @@ -0,0 +1,26 @@ +package foo + +import org.apache.spark.sql.{DataFrame, SparkSession} +import org.apache.spark.ml.feature.Tokenizer + +object Foo { + + def tokenize(spark: SparkSession): DataFrame = { + import spark.implicits._ + val data = Seq("Hello MLlib").toDF("text") + val tokenizer = new Tokenizer().setInputCol("text").setOutputCol("words") + val result = tokenizer.transform(data) + result.select("words") + } + + def main(args: Array[String]): Unit = { + val spark = SparkSession.builder() + .appName("MLlibExample") + .master("local[*]") + .getOrCreate() + + tokenize(spark).show() + + spark.stop() + } +} diff --git a/example/scalalib/spark/6-hello-mllib/foo/test/src/FooTests.scala b/example/scalalib/spark/6-hello-mllib/foo/test/src/FooTests.scala new file mode 100644 index 00000000000..831f31cccc5 --- /dev/null +++ b/example/scalalib/spark/6-hello-mllib/foo/test/src/FooTests.scala @@ -0,0 +1,21 @@ +package foo + +import org.apache.spark.sql.SparkSession +import utest._ + +object FooTests extends TestSuite { + def tests = Tests { + test("'tokenize' should split 'Hello MLlib' into words") { + val spark = SparkSession.builder() + .appName("MLlibTest") + .master("local[*]") + .getOrCreate() + + val df = Foo.tokenize(spark) + val tokens = df.collect().toList.flatMap(_.getSeq(0)) + assert(tokens == List("hello", "mllib")) + + spark.stop() + } + } +} diff --git a/example/scalalib/spark/7-hello-pyspark-mllib/build.mill b/example/scalalib/spark/7-hello-pyspark-mllib/build.mill new file mode 100644 index 00000000000..f726134f733 --- /dev/null +++ b/example/scalalib/spark/7-hello-pyspark-mllib/build.mill @@ -0,0 +1,33 @@ +package build +import mill._, pythonlib._ + +object foo extends PythonModule { + + def mainScript = Task.Source { "src/foo.py" } + def pythonDeps = Seq("pyspark==3.5.4", "numpy==2.1.2", "packaging") + + object test extends PythonTests with TestModule.Unittest + +} + +// This example demonstrates running a PySpark application with MLlib using Mill + +/** Usage + +> ./mill foo.run +... ++--------------+ +|words | ++--------------+ +|[hello, mllib]| ++--------------+ + +> ./mill foo.test +... +test_tokenize... +... +Ran 1 test... +... +OK +... +*/ diff --git a/example/scalalib/spark/7-hello-pyspark-mllib/foo/src/foo.py b/example/scalalib/spark/7-hello-pyspark-mllib/foo/src/foo.py new file mode 100644 index 00000000000..1e681d17605 --- /dev/null +++ b/example/scalalib/spark/7-hello-pyspark-mllib/foo/src/foo.py @@ -0,0 +1,23 @@ +import monkey_patch +from pyspark.sql import SparkSession, DataFrame +from pyspark.ml.feature import Tokenizer + +def tokenize(spark: SparkSession) -> DataFrame: + data = [("Hello MLlib",)] + df = spark.createDataFrame(data, ["text"]) + tokenizer = Tokenizer(inputCol="text", outputCol="words") + result = tokenizer.transform(df) + return result.select("words") + +def main(): + spark = SparkSession.builder \ + .appName("MLlibExample") \ + .master("local[*]") \ + .getOrCreate() + + tokenize(spark).show(truncate=False) + + spark.stop() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/example/scalalib/spark/7-hello-pyspark-mllib/foo/src/monkey_patch.py b/example/scalalib/spark/7-hello-pyspark-mllib/foo/src/monkey_patch.py new file mode 100644 index 00000000000..ed4d1e664f9 --- /dev/null +++ b/example/scalalib/spark/7-hello-pyspark-mllib/foo/src/monkey_patch.py @@ -0,0 +1,31 @@ +# monkey_patch.py +import sys + +# Patch distutils.version.LooseVersion if not available +try: + from distutils.version import LooseVersion +except ModuleNotFoundError: + from packaging.version import Version + class LooseVersion(str): + def __init__(self, v): + self._v = Version(v) + def __lt__(self, other): + return self._v < Version(other) + def __le__(self, other): + return self._v <= Version(other) + def __eq__(self, other): + return self._v == Version(other) + def __ne__(self, other): + return self._v != Version(other) + def __gt__(self, other): + return self._v > Version(other) + def __ge__(self, other): + return self._v >= Version(other) + + import types + distutils = types.ModuleType("distutils") + version = types.ModuleType("distutils.version") + version.LooseVersion = LooseVersion + distutils.version = version + sys.modules["distutils"] = distutils + sys.modules["distutils.version"] = version \ No newline at end of file diff --git a/example/scalalib/spark/7-hello-pyspark-mllib/foo/test/src/test.py b/example/scalalib/spark/7-hello-pyspark-mllib/foo/test/src/test.py new file mode 100644 index 00000000000..b6feeb4fa7d --- /dev/null +++ b/example/scalalib/spark/7-hello-pyspark-mllib/foo/test/src/test.py @@ -0,0 +1,23 @@ +import unittest +from pyspark.sql import SparkSession +from foo import tokenize + +class TokenizeTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.spark = SparkSession.builder \ + .appName("MLlibTest") \ + .master("local[*]") \ + .getOrCreate() + + @classmethod + def tearDownClass(cls): + cls.spark.stop() + + def test_tokenize(self): + df = tokenize(self.spark) + tokens = [row["words"] for row in df.collect()] + self.assertEqual(tokens, [["hello", "mllib"]]) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/example/scalalib/spark/3-semi-realistic/build.mill b/example/scalalib/spark/8-semi-realistic/build.mill similarity index 100% rename from example/scalalib/spark/3-semi-realistic/build.mill rename to example/scalalib/spark/8-semi-realistic/build.mill diff --git a/example/scalalib/spark/3-semi-realistic/resources/transactions.csv b/example/scalalib/spark/8-semi-realistic/resources/transactions.csv similarity index 100% rename from example/scalalib/spark/3-semi-realistic/resources/transactions.csv rename to example/scalalib/spark/8-semi-realistic/resources/transactions.csv diff --git a/example/scalalib/spark/3-semi-realistic/spark-submit.sh b/example/scalalib/spark/8-semi-realistic/spark-submit.sh similarity index 100% rename from example/scalalib/spark/3-semi-realistic/spark-submit.sh rename to example/scalalib/spark/8-semi-realistic/spark-submit.sh diff --git a/example/scalalib/spark/3-semi-realistic/src/foo/Foo.scala b/example/scalalib/spark/8-semi-realistic/src/foo/Foo.scala similarity index 100% rename from example/scalalib/spark/3-semi-realistic/src/foo/Foo.scala rename to example/scalalib/spark/8-semi-realistic/src/foo/Foo.scala diff --git a/example/scalalib/spark/3-semi-realistic/test/src/FooTests.scala b/example/scalalib/spark/8-semi-realistic/test/src/FooTests.scala similarity index 100% rename from example/scalalib/spark/3-semi-realistic/test/src/FooTests.scala rename to example/scalalib/spark/8-semi-realistic/test/src/FooTests.scala diff --git a/website/docs/modules/ROOT/pages/scalalib/spark.adoc b/website/docs/modules/ROOT/pages/scalalib/spark.adoc index 0c8d30e834d..5bd2064030d 100644 --- a/website/docs/modules/ROOT/pages/scalalib/spark.adoc +++ b/website/docs/modules/ROOT/pages/scalalib/spark.adoc @@ -12,6 +12,26 @@ include::partial$example/scalalib/spark/1-hello-spark.adoc[] include::partial$example/scalalib/spark/1-hello-spark.adoc[] +== Spark Streaming + +include::partial$example/scalalib/spark/3-spark-streaming.adoc[] + +== Hello Spark + Delta + +include::partial$example/scalalib/spark/4-hello-delta.adoc[] + +== Hello Spark + IceBerg + +include::partial$example/scalalib/spark/5-hello-iceberg.adoc[] + +== Hello Spark + ML-Lib + +include::partial$example/scalalib/spark/6-hello-mllib.adoc[] + +== Hello Pyspark + ML-Lib + +include::partial$example/scalalib/spark/7-hello-pyspark-mllib.adoc[] + == Semi realistic spark project with spark submit -include::partial$example/scalalib/spark/3-semi-realistic.adoc[] \ No newline at end of file +include::partial$example/scalalib/spark/8-semi-realistic.adoc[] \ No newline at end of file