diff --git a/example/package.mill b/example/package.mill
index 6eb137081d2..437c68f5adc 100644
--- a/example/package.mill
+++ b/example/package.mill
@@ -269,7 +269,7 @@ $txt
                 if (seenCode) ""
                 else {
                   val exampleDashed = examplePath.segments.mkString("-")
-                  val download = 
+                  val download =
                     s"{mill-download-url}/mill-dist-${build.millVersion()}-$exampleDashed.zip[download]"
                   val browse = s"{mill-example-url}/$examplePath[browse]"
                   s".build.mill ($download, $browse)"
diff --git a/example/scalalib/spark/2-hello-pyspark/build.mill b/example/scalalib/spark/2-hello-pyspark/build.mill
index c0a189f142d..5dfde6d9ae8 100644
--- a/example/scalalib/spark/2-hello-pyspark/build.mill
+++ b/example/scalalib/spark/2-hello-pyspark/build.mill
@@ -10,6 +10,8 @@ object foo extends PythonModule {
 
 }
 
+// This examples demonstrates running pyspark using mill.
+
 /** Usage
 
 > ./mill foo.run
diff --git a/example/scalalib/spark/3-spark-streaming/build.mill b/example/scalalib/spark/3-spark-streaming/build.mill
new file mode 100644
index 00000000000..9bd8574de81
--- /dev/null
+++ b/example/scalalib/spark/3-spark-streaming/build.mill
@@ -0,0 +1,132 @@
+package build
+import mill._, scalalib._
+
+object foo extends ScalaModule {
+  def scalaVersion = "2.12.15"
+  def mvnDeps = Seq(
+    mvn"org.apache.spark::spark-core:3.5.4",
+    mvn"org.apache.spark::spark-sql:3.5.4",
+    mvn"org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.4"
+  )
+
+  def forkArgs = Seq("--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED")
+
+  def forkEnv: T[Map[String, String]] = Map("KAFKA_SERVER" -> "localhost:9092")
+
+  def createTopic(args: mill.define.Args) = Task.Command {
+    val topic = args.value.mkString(" ")
+    val server = forkEnv()("KAFKA_SERVER")
+    os.call(
+      (
+        "docker-compose",
+        "exec",
+        "kafka",
+        "kafka-topics",
+        "--create",
+        "--topic",
+        topic,
+        "--bootstrap-server",
+        server
+      ),
+      stdout = os.Inherit
+    )
+  }
+
+  def listTopics: T[Unit] = Task {
+    val server = forkEnv()("KAFKA_SERVER")
+    os.call(
+      Seq(
+        "docker-compose",
+        "exec",
+        "kafka",
+        "kafka-topics",
+        "--list",
+        "--bootstrap-server",
+        server
+      ),
+      stdout = os.Inherit
+    )
+  }
+
+  def describeTopic(args: mill.define.Args) = Task.Command {
+    val topic = args.value.mkString(" ")
+    val server = forkEnv()("KAFKA_SERVER")
+    os.call(
+      Seq(
+        "docker-compose",
+        "exec",
+        "kafka",
+        "kafka-topics",
+        "--describe",
+        "--topic",
+        topic,
+        "--bootstrap-server",
+        server
+      ),
+      stdout = os.Inherit
+    )
+  }
+
+  def readAllMessages(args: mill.define.Args) = Task.Command {
+    val topic = args.value.mkString(" ")
+    val server = forkEnv()("KAFKA_SERVER")
+    os.call(
+      Seq(
+        "docker-compose",
+        "exec",
+        "kafka",
+        "kafka-console-consumer",
+        "--bootstrap-server",
+        server,
+        "--topic",
+        topic,
+        "--from-beginning",
+        "--timeout-ms",
+        "1000"
+      ),
+      stdout = os.Inherit
+    )
+  }
+
+}
+
+object producer extends ScalaModule {
+  def scalaVersion = "2.12.15"
+  def mvnDeps = Seq(
+    mvn"org.apache.kafka:kafka-clients:3.7.0"
+  )
+}
+
+// This example demonstrates how to run a Spark Streaming application using Mill.
+// It showcases how Mill can be used to manage a data source like Kafka by defining
+// custom Mill tasks to execute administrative Kafka commands. This allows for intuitive,
+// scriptable management of Kafka directly through Mill.
+
+/** Usage
+
+> docker-compose up -d
+
+> ./mill foo.createTopic "test-topic" # Custom mill task to create topic.
+...
+Created topic test-topic.
+
+> ./mill foo.listTopics # Custom mill task to list all topics.
+...
+test-topic
+
+> ./mill foo.describeTopic "test-topic" # Custom mill task to describe a topic.
+...
+Topic: test-topic...
+
+> ./mill producer.run # Send message
+
+> ./mill foo.run # Consume message with spark.
+...
++-------------+
+|      message|
++-------------+
+|Hello, World!|
++-------------+
+
+> docker-compose down -v --remove-orphans
+*/
diff --git a/example/scalalib/spark/3-spark-streaming/docker-compose.yml b/example/scalalib/spark/3-spark-streaming/docker-compose.yml
new file mode 100644
index 00000000000..7739db11b20
--- /dev/null
+++ b/example/scalalib/spark/3-spark-streaming/docker-compose.yml
@@ -0,0 +1,22 @@
+version: '3.8'
+
+services:
+  zookeeper:
+    image: confluentinc/cp-zookeeper:7.5.0
+    ports:
+      - "2181:2181"
+    environment:
+      ZOOKEEPER_CLIENT_PORT: 2181
+      ZOOKEEPER_TICK_TIME: 2000
+
+  kafka:
+    image: confluentinc/cp-kafka:7.5.0
+    ports:
+      - "9092:9092"
+    environment:
+      KAFKA_BROKER_ID: 1
+      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
+      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092
+      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
+    depends_on:
+      - zookeeper
\ No newline at end of file
diff --git a/example/scalalib/spark/3-spark-streaming/foo/src/foo/Foo.scala b/example/scalalib/spark/3-spark-streaming/foo/src/foo/Foo.scala
new file mode 100644
index 00000000000..c7b772b16a5
--- /dev/null
+++ b/example/scalalib/spark/3-spark-streaming/foo/src/foo/Foo.scala
@@ -0,0 +1,33 @@
+package foo
+
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.functions._
+
+object Foo {
+
+  def streamFromKafka(spark: SparkSession): DataFrame = {
+    spark.readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", sys.env("KAFKA_SERVER"))
+      .option("subscribe", "test-topic")
+      .option("startingOffsets", "earliest")
+      .load()
+      .selectExpr("CAST(value AS STRING)").toDF("message")
+  }
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession.builder()
+      .appName("HelloWorldKafka")
+      .master("local[*]")
+      .getOrCreate()
+
+    val df = streamFromKafka(spark)
+
+    val query = df.writeStream
+      .format("console")
+      .outputMode("append")
+      .start()
+
+    query.awaitTermination(9000)
+  }
+}
diff --git a/example/scalalib/spark/3-spark-streaming/producer/src/producer/Producer.scala b/example/scalalib/spark/3-spark-streaming/producer/src/producer/Producer.scala
new file mode 100644
index 00000000000..e995e222bd9
--- /dev/null
+++ b/example/scalalib/spark/3-spark-streaming/producer/src/producer/Producer.scala
@@ -0,0 +1,13 @@
+import org.apache.kafka.clients.producer._
+
+object Producer {
+  def main(args: Array[String]): Unit = {
+    val props = new java.util.Properties()
+    props.put("bootstrap.servers", "localhost:9092")
+    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
+    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
+    val producer = new KafkaProducer[String, String](props)
+    producer.send(new ProducerRecord[String, String]("test-topic", "Hello, World!"))
+    producer.close()
+  }
+}
diff --git a/example/scalalib/spark/4-hello-delta/build.mill b/example/scalalib/spark/4-hello-delta/build.mill
new file mode 100644
index 00000000000..8cbe4574c52
--- /dev/null
+++ b/example/scalalib/spark/4-hello-delta/build.mill
@@ -0,0 +1,38 @@
+package build
+import mill._, scalalib._
+
+object foo extends ScalaModule {
+  def scalaVersion = "2.12.15"
+  def mvnDeps = Seq(
+    mvn"org.apache.spark::spark-core:3.4.0",
+    mvn"org.apache.spark::spark-sql:3.4.0",
+    mvn"io.delta:delta-core_2.12:2.4.0"
+  )
+
+  def forkArgs = Seq("--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED")
+
+  object test extends ScalaTests {
+    def mvnDeps = Seq(mvn"com.lihaoyi::utest:0.8.5")
+    def testFramework = "utest.runner.Framework"
+
+    def forkArgs = Seq("--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED")
+  }
+
+}
+
+// This example demonstrates running a Spark application with Delta Lake as the storage layer, using Mill.
+
+/** Usage
+
+> ./mill foo.run
++-------------+
+|      message|
++-------------+
+|Hello, Delta!|
++-------------+
+
+> ./mill foo.test
+...
++ foo.FooTests.Delta table should contain one row with 'Hello, Delta!'...
+...
+*/
diff --git a/example/scalalib/spark/4-hello-delta/foo/src/foo/Foo.scala b/example/scalalib/spark/4-hello-delta/foo/src/foo/Foo.scala
new file mode 100644
index 00000000000..7b4b05cbacd
--- /dev/null
+++ b/example/scalalib/spark/4-hello-delta/foo/src/foo/Foo.scala
@@ -0,0 +1,33 @@
+package foo
+
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import io.delta.tables._
+
+object Foo {
+
+  def writeDeltaTable(spark: SparkSession, path: String): Unit = {
+    import spark.implicits._
+    val data = Seq("Hello, Delta!").toDF("message")
+    data.write.format("delta").mode("overwrite").save(path)
+  }
+
+  def readDeltaTable(spark: SparkSession, path: String): DataFrame = {
+    spark.read.format("delta").load(path)
+  }
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession.builder()
+      .appName("HelloDelta")
+      .master("local[*]")
+      .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
+      .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
+      .getOrCreate()
+
+    val deltaPath = "tmp/delta-table"
+
+    writeDeltaTable(spark, deltaPath)
+    readDeltaTable(spark, deltaPath).show()
+
+    spark.stop()
+  }
+}
diff --git a/example/scalalib/spark/4-hello-delta/foo/test/src/FooTests.scala b/example/scalalib/spark/4-hello-delta/foo/test/src/FooTests.scala
new file mode 100644
index 00000000000..b9b76fafb36
--- /dev/null
+++ b/example/scalalib/spark/4-hello-delta/foo/test/src/FooTests.scala
@@ -0,0 +1,29 @@
+package foo
+
+import org.apache.spark.sql.SparkSession
+import utest._
+
+object FooTests extends TestSuite {
+  def tests = Tests {
+    test("Delta table should contain one row with 'Hello, Delta!'") {
+      val spark = SparkSession.builder()
+        .appName("FooDeltaTest")
+        .master("local[*]")
+        .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
+        .config(
+          "spark.sql.catalog.spark_catalog",
+          "org.apache.spark.sql.delta.catalog.DeltaCatalog"
+        )
+        .getOrCreate()
+
+      val path = "tmp/test-delta-table"
+      Foo.writeDeltaTable(spark, path)
+      val df = Foo.readDeltaTable(spark, path)
+
+      val messages = df.collect().map(_.getString(0)).toList
+      assert(messages == List("Hello, Delta!"))
+
+      spark.stop()
+    }
+  }
+}
diff --git a/example/scalalib/spark/5-hello-iceberg/build.mill b/example/scalalib/spark/5-hello-iceberg/build.mill
new file mode 100644
index 00000000000..a430085d439
--- /dev/null
+++ b/example/scalalib/spark/5-hello-iceberg/build.mill
@@ -0,0 +1,44 @@
+package build
+import mill._, scalalib._
+
+object foo extends ScalaModule {
+  def scalaVersion = "2.12.15"
+  def mvnDeps = Seq(
+    mvn"org.apache.spark::spark-core:3.5.4",
+    mvn"org.apache.spark::spark-sql:3.5.4",
+    mvn"org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.4.3"
+  )
+
+  def forkArgs = Seq(
+    "--add-opens=java.base/java.nio=ALL-UNNAMED",
+    "--add-exports=java.base/sun.nio.ch=ALL-UNNAMED"
+  )
+
+  object test extends ScalaTests {
+    def mvnDeps = Seq(mvn"com.lihaoyi::utest:0.8.5")
+    def testFramework = "utest.runner.Framework"
+
+    def forkArgs = Seq(
+      "--add-opens=java.base/java.nio=ALL-UNNAMED",
+      "--add-exports=java.base/sun.nio.ch=ALL-UNNAMED"
+    )
+  }
+
+}
+
+// This example demonstrates running a Spark application with Ice-Berg as the storage layer, using Mill.
+
+/** Usage
+
+> ./mill foo.run
++---------------+
+|        message|
++---------------+
+|Hello, Iceberg!|
++---------------+
+
+> ./mill foo.test
+...
++ foo.FooTests.Iceberg table should contain one row with 'Hello, Iceberg!'...
+...
+*/
diff --git a/example/scalalib/spark/5-hello-iceberg/foo/src/foo/Foo.scala b/example/scalalib/spark/5-hello-iceberg/foo/src/foo/Foo.scala
new file mode 100644
index 00000000000..1d705d13328
--- /dev/null
+++ b/example/scalalib/spark/5-hello-iceberg/foo/src/foo/Foo.scala
@@ -0,0 +1,34 @@
+package foo
+
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import java.nio.file.{Files, Paths}
+
+object Foo {
+
+  def writeIcebergTable(spark: SparkSession, tableName: String): Unit = {
+    import spark.implicits._
+    val data = Seq("Hello, Iceberg!").toDF("message")
+    data.writeTo(tableName).using("iceberg").createOrReplace()
+  }
+
+  def readIcebergTable(spark: SparkSession, tableName: String): DataFrame = {
+    spark.read.table(tableName)
+  }
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession.builder()
+      .appName("HelloIceberg")
+      .master("local[*]")
+      .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog")
+      .config("spark.sql.catalog.local.type", "hadoop")
+      .config("spark.sql.catalog.local.warehouse", "tmp/iceberg-warehouse")
+      .getOrCreate()
+
+    val table = "local.db.hello_iceberg"
+
+    writeIcebergTable(spark, table)
+    readIcebergTable(spark, table).show()
+
+    spark.stop()
+  }
+}
diff --git a/example/scalalib/spark/5-hello-iceberg/foo/test/src/FooTests.scala b/example/scalalib/spark/5-hello-iceberg/foo/test/src/FooTests.scala
new file mode 100644
index 00000000000..0291c633f31
--- /dev/null
+++ b/example/scalalib/spark/5-hello-iceberg/foo/test/src/FooTests.scala
@@ -0,0 +1,27 @@
+package foo
+
+import org.apache.spark.sql.SparkSession
+import utest._
+
+object FooTests extends TestSuite {
+  def tests = Tests {
+    test("Iceberg table should contain one row with 'Hello, Iceberg!'") {
+      val spark = SparkSession.builder()
+        .appName("FooTests")
+        .master("local[*]")
+        .config("spark.sql.catalog.local", "org.apache.iceberg.spark.SparkCatalog")
+        .config("spark.sql.catalog.local.type", "hadoop")
+        .config("spark.sql.catalog.local.warehouse", "tmp/test-iceberg-warehouse")
+        .getOrCreate()
+
+      val table = "local.db.hello_iceberg_test"
+      Foo.writeIcebergTable(spark, table)
+      val df = Foo.readIcebergTable(spark, table)
+
+      val messages = df.collect().map(_.getString(0)).toList
+      assert(messages == List("Hello, Iceberg!"))
+
+      spark.stop()
+    }
+  }
+}
diff --git a/example/scalalib/spark/6-hello-mllib/build.mill b/example/scalalib/spark/6-hello-mllib/build.mill
new file mode 100644
index 00000000000..0d8e004e3c1
--- /dev/null
+++ b/example/scalalib/spark/6-hello-mllib/build.mill
@@ -0,0 +1,37 @@
+package build
+import mill._, scalalib._
+
+object foo extends ScalaModule {
+  def scalaVersion = "2.12.15"
+  def mvnDeps = Seq(
+    mvn"org.apache.spark::spark-core:3.5.4",
+    mvn"org.apache.spark::spark-sql:3.5.4",
+    mvn"org.apache.spark::spark-mllib:3.5.4"
+  )
+
+  def forkArgs = Seq("--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED")
+
+  object test extends ScalaTests {
+    def mvnDeps = Seq(mvn"com.lihaoyi::utest:0.8.5")
+    def testFramework = "utest.runner.Framework"
+
+    def forkArgs = Seq("--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED")
+  }
+
+}
+// This example demonstrates running a Spark application with MLlib using Mill.
+
+/** Usage
+
+> ./mill foo.run
++--------------+
+|         words|
++--------------+
+|[hello, mllib]|
++--------------+
+
+> ./mill foo.test
+...
++ foo.FooTests.'tokenize' should split 'Hello MLlib' into words...
+...
+*/
diff --git a/example/scalalib/spark/6-hello-mllib/foo/src/foo/Foo.scala b/example/scalalib/spark/6-hello-mllib/foo/src/foo/Foo.scala
new file mode 100644
index 00000000000..99af9c775ba
--- /dev/null
+++ b/example/scalalib/spark/6-hello-mllib/foo/src/foo/Foo.scala
@@ -0,0 +1,26 @@
+package foo
+
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.ml.feature.Tokenizer
+
+object Foo {
+
+  def tokenize(spark: SparkSession): DataFrame = {
+    import spark.implicits._
+    val data = Seq("Hello MLlib").toDF("text")
+    val tokenizer = new Tokenizer().setInputCol("text").setOutputCol("words")
+    val result = tokenizer.transform(data)
+    result.select("words")
+  }
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession.builder()
+      .appName("MLlibExample")
+      .master("local[*]")
+      .getOrCreate()
+
+    tokenize(spark).show()
+
+    spark.stop()
+  }
+}
diff --git a/example/scalalib/spark/6-hello-mllib/foo/test/src/FooTests.scala b/example/scalalib/spark/6-hello-mllib/foo/test/src/FooTests.scala
new file mode 100644
index 00000000000..831f31cccc5
--- /dev/null
+++ b/example/scalalib/spark/6-hello-mllib/foo/test/src/FooTests.scala
@@ -0,0 +1,21 @@
+package foo
+
+import org.apache.spark.sql.SparkSession
+import utest._
+
+object FooTests extends TestSuite {
+  def tests = Tests {
+    test("'tokenize' should split 'Hello MLlib' into words") {
+      val spark = SparkSession.builder()
+        .appName("MLlibTest")
+        .master("local[*]")
+        .getOrCreate()
+
+      val df = Foo.tokenize(spark)
+      val tokens = df.collect().toList.flatMap(_.getSeq(0))
+      assert(tokens == List("hello", "mllib"))
+
+      spark.stop()
+    }
+  }
+}
diff --git a/example/scalalib/spark/7-hello-pyspark-mllib/build.mill b/example/scalalib/spark/7-hello-pyspark-mllib/build.mill
new file mode 100644
index 00000000000..f726134f733
--- /dev/null
+++ b/example/scalalib/spark/7-hello-pyspark-mllib/build.mill
@@ -0,0 +1,33 @@
+package build
+import mill._, pythonlib._
+
+object foo extends PythonModule {
+
+  def mainScript = Task.Source { "src/foo.py" }
+  def pythonDeps = Seq("pyspark==3.5.4", "numpy==2.1.2", "packaging")
+
+  object test extends PythonTests with TestModule.Unittest
+
+}
+
+// This example demonstrates running a PySpark application with MLlib using Mill
+
+/** Usage
+
+> ./mill foo.run
+...
++--------------+
+|words         |
++--------------+
+|[hello, mllib]|
++--------------+
+
+> ./mill foo.test
+...
+test_tokenize...
+...
+Ran 1 test...
+...
+OK
+...
+*/
diff --git a/example/scalalib/spark/7-hello-pyspark-mllib/foo/src/foo.py b/example/scalalib/spark/7-hello-pyspark-mllib/foo/src/foo.py
new file mode 100644
index 00000000000..1e681d17605
--- /dev/null
+++ b/example/scalalib/spark/7-hello-pyspark-mllib/foo/src/foo.py
@@ -0,0 +1,23 @@
+import monkey_patch
+from pyspark.sql import SparkSession, DataFrame
+from pyspark.ml.feature import Tokenizer
+
+def tokenize(spark: SparkSession) -> DataFrame:
+    data = [("Hello MLlib",)]
+    df = spark.createDataFrame(data, ["text"])
+    tokenizer = Tokenizer(inputCol="text", outputCol="words")
+    result = tokenizer.transform(df)
+    return result.select("words")
+
+def main():
+    spark = SparkSession.builder \
+        .appName("MLlibExample") \
+        .master("local[*]") \
+        .getOrCreate()
+
+    tokenize(spark).show(truncate=False)
+
+    spark.stop()
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/example/scalalib/spark/7-hello-pyspark-mllib/foo/src/monkey_patch.py b/example/scalalib/spark/7-hello-pyspark-mllib/foo/src/monkey_patch.py
new file mode 100644
index 00000000000..ed4d1e664f9
--- /dev/null
+++ b/example/scalalib/spark/7-hello-pyspark-mllib/foo/src/monkey_patch.py
@@ -0,0 +1,31 @@
+# monkey_patch.py
+import sys
+
+# Patch distutils.version.LooseVersion if not available
+try:
+    from distutils.version import LooseVersion
+except ModuleNotFoundError:
+    from packaging.version import Version
+    class LooseVersion(str):
+        def __init__(self, v):
+            self._v = Version(v)
+        def __lt__(self, other):
+            return self._v < Version(other)
+        def __le__(self, other):
+            return self._v <= Version(other)
+        def __eq__(self, other):
+            return self._v == Version(other)
+        def __ne__(self, other):
+            return self._v != Version(other)
+        def __gt__(self, other):
+            return self._v > Version(other)
+        def __ge__(self, other):
+            return self._v >= Version(other)
+
+    import types
+    distutils = types.ModuleType("distutils")
+    version = types.ModuleType("distutils.version")
+    version.LooseVersion = LooseVersion
+    distutils.version = version
+    sys.modules["distutils"] = distutils
+    sys.modules["distutils.version"] = version
\ No newline at end of file
diff --git a/example/scalalib/spark/7-hello-pyspark-mllib/foo/test/src/test.py b/example/scalalib/spark/7-hello-pyspark-mllib/foo/test/src/test.py
new file mode 100644
index 00000000000..b6feeb4fa7d
--- /dev/null
+++ b/example/scalalib/spark/7-hello-pyspark-mllib/foo/test/src/test.py
@@ -0,0 +1,23 @@
+import unittest
+from pyspark.sql import SparkSession
+from foo import tokenize
+
+class TokenizeTest(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.spark = SparkSession.builder \
+            .appName("MLlibTest") \
+            .master("local[*]") \
+            .getOrCreate()
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.spark.stop()
+
+    def test_tokenize(self):
+        df = tokenize(self.spark)
+        tokens = [row["words"] for row in df.collect()]
+        self.assertEqual(tokens, [["hello", "mllib"]])
+
+if __name__ == "__main__":
+    unittest.main()
\ No newline at end of file
diff --git a/example/scalalib/spark/3-semi-realistic/build.mill b/example/scalalib/spark/8-semi-realistic/build.mill
similarity index 100%
rename from example/scalalib/spark/3-semi-realistic/build.mill
rename to example/scalalib/spark/8-semi-realistic/build.mill
diff --git a/example/scalalib/spark/3-semi-realistic/resources/transactions.csv b/example/scalalib/spark/8-semi-realistic/resources/transactions.csv
similarity index 100%
rename from example/scalalib/spark/3-semi-realistic/resources/transactions.csv
rename to example/scalalib/spark/8-semi-realistic/resources/transactions.csv
diff --git a/example/scalalib/spark/3-semi-realistic/spark-submit.sh b/example/scalalib/spark/8-semi-realistic/spark-submit.sh
similarity index 100%
rename from example/scalalib/spark/3-semi-realistic/spark-submit.sh
rename to example/scalalib/spark/8-semi-realistic/spark-submit.sh
diff --git a/example/scalalib/spark/3-semi-realistic/src/foo/Foo.scala b/example/scalalib/spark/8-semi-realistic/src/foo/Foo.scala
similarity index 100%
rename from example/scalalib/spark/3-semi-realistic/src/foo/Foo.scala
rename to example/scalalib/spark/8-semi-realistic/src/foo/Foo.scala
diff --git a/example/scalalib/spark/3-semi-realistic/test/src/FooTests.scala b/example/scalalib/spark/8-semi-realistic/test/src/FooTests.scala
similarity index 100%
rename from example/scalalib/spark/3-semi-realistic/test/src/FooTests.scala
rename to example/scalalib/spark/8-semi-realistic/test/src/FooTests.scala
diff --git a/website/docs/modules/ROOT/pages/scalalib/spark.adoc b/website/docs/modules/ROOT/pages/scalalib/spark.adoc
index 0c8d30e834d..5bd2064030d 100644
--- a/website/docs/modules/ROOT/pages/scalalib/spark.adoc
+++ b/website/docs/modules/ROOT/pages/scalalib/spark.adoc
@@ -12,6 +12,26 @@ include::partial$example/scalalib/spark/1-hello-spark.adoc[]
 
 include::partial$example/scalalib/spark/1-hello-spark.adoc[]
 
+== Spark Streaming
+
+include::partial$example/scalalib/spark/3-spark-streaming.adoc[]
+
+== Hello Spark + Delta
+
+include::partial$example/scalalib/spark/4-hello-delta.adoc[]
+
+== Hello Spark + IceBerg
+
+include::partial$example/scalalib/spark/5-hello-iceberg.adoc[]
+
+== Hello Spark + ML-Lib
+
+include::partial$example/scalalib/spark/6-hello-mllib.adoc[]
+
+== Hello Pyspark + ML-Lib
+
+include::partial$example/scalalib/spark/7-hello-pyspark-mllib.adoc[]
+
 == Semi realistic spark project with spark submit
 
-include::partial$example/scalalib/spark/3-semi-realistic.adoc[]
\ No newline at end of file
+include::partial$example/scalalib/spark/8-semi-realistic.adoc[]
\ No newline at end of file