diff --git a/benchmark/build.sbt b/benchmark/build.sbt new file mode 100644 index 00000000..4b81ae1a --- /dev/null +++ b/benchmark/build.sbt @@ -0,0 +1,29 @@ +libraryDependencies ++= { + import Ordering.Implicits._ + if (VersionNumber(scalaVersion.value).numbers >= Seq(2, 12)) { + Nil + } else { + Seq( + "com.thoughtworks.deeplearning.etl" %% "cifar100" % "0.2.0", + "ch.qos.logback" % "logback-classic" % "1.2.3" % Optional, + "org.deeplearning4j" % "deeplearning4j-core" % "0.8.0", + "org.nd4j" %% "nd4s" % "0.8.0", + "org.nd4j" % "nd4j-api" % "0.8.0", + "org.nd4j" % "nd4j-native-platform" % "0.8.0" % Optional + ) + } +} + +fork in Test := true + +enablePlugins(JmhPlugin) + +publishArtifact := false + +addCompilerPlugin("com.thoughtworks.dsl" %% "compilerplugins-bangnotation" % "1.0.0-RC10") + +addCompilerPlugin("com.thoughtworks.dsl" %% "compilerplugins-reseteverywhere" % "1.0.0-RC10") + +libraryDependencies += "com.thoughtworks.dsl" %% "domains-scalaz" % "1.0.0-RC10" + +addCompilerPlugin("com.thoughtworks.import" %% "import" % "2.0.2") diff --git a/benchmark/src/jmh/scala/com/thoughtworks/deeplearning/benchmark/benchmark.scala b/benchmark/src/jmh/scala/com/thoughtworks/deeplearning/benchmark/benchmark.scala new file mode 100644 index 00000000..90194f3b --- /dev/null +++ b/benchmark/src/jmh/scala/com/thoughtworks/deeplearning/benchmark/benchmark.scala @@ -0,0 +1,348 @@ +package com.thoughtworks.deeplearning.benchmark + +import java.util.concurrent.{ExecutorService, Executors} + +import com.thoughtworks.deeplearning.DeepLearning +import com.thoughtworks.deeplearning.etl.Cifar100 +import com.thoughtworks.deeplearning.etl.Cifar100.Batch +import com.thoughtworks.deeplearning.plugins.Builtins +import com.thoughtworks.feature.Factory +import org.openjdk.jmh.annotations._ +import com.thoughtworks.future._ +import org.deeplearning4j.nn.conf.ComputationGraphConfiguration +import org.deeplearning4j.nn.conf.NeuralNetConfiguration +import org.deeplearning4j.nn.conf.layers.{ActivationLayer, DenseLayer, LossLayer, OutputLayer} +import org.deeplearning4j.nn.conf.Updater +import org.deeplearning4j.nn.conf.graph.{ElementWiseVertex, MergeVertex, StackVertex} +import org.deeplearning4j.nn.graph.ComputationGraph +import org.deeplearning4j.nn.multilayer.MultiLayerNetwork +import org.nd4j.linalg.activations.Activation +import org.nd4j.linalg.api.ndarray.INDArray +import org.nd4j.linalg.dataset.{DataSet, MultiDataSet} +import org.nd4j.linalg.factory.Nd4j +import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction +import org.nd4j.linalg.ops.transforms.Transforms + +import scala.concurrent.{ExecutionContext, ExecutionContextExecutorService} + +/** + * @author 杨博 (Yang Bo) + */ +object benchmark { + + import $exec.`https://gist.github.com/Atry/1fb0608c655e3233e68b27ba99515f16/raw/39ba06ee597839d618f2fcfe9526744c60f2f70a/FixedLearningRate.sc` + + trait LayerOutput { + def numberOfFeatures: Int + type Output + def output: Output + def typeClassInstance: DeepLearning.Aux[Output, INDArray, INDArray] + } + object LayerOutput { + def input(indArray: INDArray): LayerOutput = new LayerOutput { + def numberOfFeatures: Int = indArray.shape().apply(1) + + type Output = INDArray + def output = indArray + + def typeClassInstance: DeepLearning.Aux[INDArray, INDArray, INDArray] = ??? + } + } + + @Threads(value = 1) + @State(Scope.Benchmark) + class BranchNetBenchmark { + + private def deeplearning4jConf = { + + val builder = new NeuralNetConfiguration.Builder() + .updater(Updater.SGD) + .learningRate(1.0) + .graphBuilder + .addInputs("input") + + for (i <- 0 until numberOfBranches) { + builder + .addLayer( + s"coarse${i}_dense0", + new DenseLayer.Builder() + .activation(Activation.RELU) + .nIn(Cifar100.NumberOfPixelsPerSample) + .nOut(numberOfHiddenFeatures) + .build, + "input" + ) + .addLayer( + s"coarse${i}_dense1", + new DenseLayer.Builder() + .activation(Activation.RELU) + .nIn(numberOfHiddenFeatures) + .nOut(numberOfHiddenFeatures) + .build, + s"coarse${i}_dense0" + ) + } + + builder + .addVertex("fusion", + new ElementWiseVertex(ElementWiseVertex.Op.Add), + (for (i <- 0 until numberOfBranches) yield s"coarse${i}_dense1"): _*) + .addLayer( + "coarse_probabilities", + new DenseLayer.Builder() + .activation(Activation.SOFTMAX) + .nIn(numberOfHiddenFeatures) + .nOut(Cifar100.NumberOfCoarseClasses) + .build, + "fusion" + ) + .addLayer("coarse_loss", new LossLayer.Builder(LossFunction.MCXENT).build(), "coarse_probabilities") + + for (i <- 0 until Cifar100.NumberOfCoarseClasses) { + builder + .addLayer( + s"fine${i}_dense0", + new DenseLayer.Builder() + .activation(Activation.RELU) + .nIn(numberOfHiddenFeatures) + .nOut(numberOfHiddenFeatures) + .build, + "fusion" + ) + .addLayer( + s"fine${i}_dense1", + new DenseLayer.Builder() + .activation(Activation.RELU) + .nIn(numberOfHiddenFeatures) + .nOut(numberOfHiddenFeatures) + .build, + s"fine${i}_dense0" + ) + .addLayer( + s"fine${i}_scores", + new DenseLayer.Builder() + .activation(Activation.IDENTITY) + .nIn(numberOfHiddenFeatures) + .nOut(Cifar100.NumberOfFineClassesPerCoarseClass) + .build, + s"fine${i}_dense1" + ) + } + + builder + .addVertex("fine_stack", new StackVertex(), (for (i <- 0 until Cifar100.NumberOfCoarseClasses) yield s"fine${i}_scores"): _*) +// .addLayer("fine_probabilities", +// new ActivationLayer.Builder().activation(Activation.SOFTMAX).build(), +// "fine_stack") + .addLayer("fine_loss", + new LossLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX).build(), + "fine_stack") + .setOutputs("coarse_loss", "fine_loss") + .build + } + + private var computationGraph: ComputationGraph = _ + @Param(Array("8", "16")) + protected var batchSize: Int = _ + + @Param(Array("1", "2", "4")) + protected var sizeOfThreadPool: Int = _ + + @Param(Array("16", "32", "64")) + protected var numberOfHiddenFeatures: Int = _ + + @Param(Array("16", "8", "4", "2", "1")) + protected var numberOfBranches: Int = _ + + private implicit var executionContext: ExecutionContextExecutorService = _ + + private lazy val batches = { + val cifar100: Cifar100 = Cifar100.load().blockingAwait + Iterator.continually(cifar100.epochByCoarseClass(batchSize)).flatten + } + + class Model { + val hyperparameters = Factory[Builtins with FixedLearningRate].newInstance(learningRate = 0.0001) + + import hyperparameters._, implicits._ + + object CoarseFeatures extends (INDArray => INDArrayLayer) { + + val branches = Seq.fill(numberOfBranches)(new (INDArray => INDArrayLayer) { + object Dense1 extends (INDArray => INDArrayLayer) { + val weight = INDArrayWeight(Nd4j.randn(Cifar100.NumberOfPixelsPerSample, numberOfHiddenFeatures)) + val bias = INDArrayWeight(Nd4j.randn(1, numberOfHiddenFeatures)) + + def apply(input: INDArray) = { + max(input dot weight + bias, 0.0) + } + } + + val weight = INDArrayWeight(Nd4j.randn(numberOfHiddenFeatures, numberOfHiddenFeatures)) + val bias = INDArrayWeight(Nd4j.randn(1, numberOfHiddenFeatures)) + + def apply(input: INDArray) = { + max(Dense1(input) dot weight + bias, 0.0) + } + }) + + def apply(input: INDArray) = { + branches.map(_.apply(input)).reduce(_ + _) + } + } + + object CoarseProbabilityModel { + val weight = INDArrayWeight(Nd4j.randn(numberOfHiddenFeatures, Cifar100.NumberOfCoarseClasses)) + val bias = INDArrayWeight(Nd4j.randn(1, Cifar100.NumberOfCoarseClasses)) + + def apply(input: INDArrayLayer) = { + val scores = input dot weight + bias + + val expScores = exp(scores) + expScores / expScores.sum(1) + } + } + + val fineScoreModels = Seq.fill(Cifar100.NumberOfCoarseClasses)(new (INDArrayLayer => INDArrayLayer) { + object Dense2 extends (INDArrayLayer => INDArrayLayer) { + + object Dense1 extends (INDArrayLayer => INDArrayLayer) { + val weight = INDArrayWeight(Nd4j.randn(numberOfHiddenFeatures, numberOfHiddenFeatures)) + val bias = INDArrayWeight(Nd4j.randn(1, numberOfHiddenFeatures)) + + def apply(coarseFeatures: INDArrayLayer) = { + max(coarseFeatures dot weight + bias, 0.0) + } + } + + val weight = INDArrayWeight(Nd4j.randn(numberOfHiddenFeatures, numberOfHiddenFeatures)) + val bias = INDArrayWeight(Nd4j.randn(1, numberOfHiddenFeatures)) + + def apply(coarseFeatures: INDArrayLayer) = { + max(Dense1(coarseFeatures) dot weight + bias, 0.0) + } + } + + val weight = INDArrayWeight(Nd4j.randn(numberOfHiddenFeatures, Cifar100.NumberOfFineClassesPerCoarseClass)) + val bias = INDArrayWeight(Nd4j.randn(1, Cifar100.NumberOfFineClassesPerCoarseClass)) + + def apply(coarseFeatures: INDArrayLayer) = { + Dense2(coarseFeatures) dot weight + bias + } + }) + + def loss(expectedCoarseLabel: Int, batch: Batch, excludeUnmatchedFineGrainedNetwork: Boolean): DoubleLayer = { + def crossEntropy(prediction: INDArrayLayer, expectOutput: INDArray): DoubleLayer = { + -(hyperparameters.log(prediction) * expectOutput).mean + } + + val Array(batchSize, width, height, channels) = batch.pixels.shape() + val coarseFeatures = CoarseFeatures(batch.pixels.reshape(batchSize, width * height * channels)) + val coarseProbabilities = CoarseProbabilityModel(coarseFeatures) + + crossEntropy(coarseProbabilities, batch.coarseClasses) + { + if (excludeUnmatchedFineGrainedNetwork) { + val fineScores = fineScoreModels(expectedCoarseLabel)(coarseFeatures) + val expScores = exp(fineScores) + val fineProbabilities = expScores / expScores.sum(1) + crossEntropy(fineProbabilities, batch.localFineClasses) + } else { + val expScoresByCoarseLabel = for (coarseLabel <- 0 until Cifar100.NumberOfCoarseClasses) yield { + val fineScores = fineScoreModels(expectedCoarseLabel)(coarseFeatures) + exp(fineScores) + } + val expSum = expScoresByCoarseLabel.map(_.sum(1)).reduce(_ + _) + val lossPerCoarseLabel = for ((expScores, coarseLabel) <- expScoresByCoarseLabel.zipWithIndex) yield { + val fineProbabilities = expScores / expSum + + crossEntropy( + fineProbabilities, + if (coarseLabel == expScoresByCoarseLabel) { + batch.localFineClasses + } else { + Nd4j.zeros(batchSize, Cifar100.NumberOfFineClassesPerCoarseClass) + } + ) + } + lossPerCoarseLabel.reduce(_ + _) + } + } + } + + def train(coarseLabel: Int, batch: Batch, excludeUnmatchedFineGrainedNetwork: Boolean) = { + loss(coarseLabel, batch, excludeUnmatchedFineGrainedNetwork).train + } + + } + + private var model: Model = null + + @Setup + final def setup(): Unit = { + computationGraph = new ComputationGraph(deeplearning4jConf) + computationGraph.init() + + executionContext = ExecutionContext.fromExecutorService(Executors.newFixedThreadPool(sizeOfThreadPool)) + model = new Model + } + + @TearDown + final def tearDown(): Unit = { + model = null + executionContext.shutdown() + executionContext = null + computationGraph = null + } + + @Benchmark + final def deeplearning4j(): Double = { + val (coarseClass, batch) = batches.synchronized { + batches.next() + } + + val dataset = new MultiDataSet() + + val pixels2d = batch.pixels2d + + dataset.setFeatures(Array(pixels2d)) + + val coarseLabels = Nd4j.zeros(1, Cifar100.NumberOfCoarseClasses) + coarseLabels.put(0, coarseClass, 1.0) + + val broadcastCoarseLabels = coarseLabels.broadcast(pixels2d.rows(), Cifar100.NumberOfCoarseClasses) + + val fineLabels = Nd4j.concat( + 1, + (for (i <- 0 until Cifar100.NumberOfCoarseClasses) yield { + if (i == coarseClass) { + batch.localFineClasses + } else { + Nd4j.zeros(pixels2d.rows(), Cifar100.NumberOfFineClassesPerCoarseClass) + } + }): _* + ) + + dataset.setLabels(Array(broadcastCoarseLabels, fineLabels)) + + computationGraph.score(dataset, true) + + } + + @Benchmark + final def deepLearningDotScalaExcludeUnmatchedFineGrainedNetwork(): Double = { + val (coarseClass, batch) = batches.synchronized { + batches.next() + } + model.train(coarseClass, batch, true).blockingAwait + } + @Benchmark + final def deepLearningDotScala(): Double = { + val (coarseClass, batch) = batches.synchronized { + batches.next() + } + model.train(coarseClass, batch, false).blockingAwait + } + + } + +} diff --git a/build.sbt b/build.sbt index aadd57d4..be76607e 100644 --- a/build.sbt +++ b/build.sbt @@ -144,6 +144,9 @@ lazy val `plugins-Builtins` = `plugins-CumulativeINDArrayLayers`, DeepLearning % "test->test" ) + +lazy val benchmark = project.dependsOn(`plugins-Builtins`) + publishArtifact := false lazy val unidoc = diff --git a/docker-run.sh b/docker-run.sh new file mode 100644 index 00000000..d59390c0 --- /dev/null +++ b/docker-run.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +docker run \ + --volume /etc/passwd:/etc/passwd:ro \ + --user "$(id -u)" \ + --volume "$HOME:$HOME" \ + --volume "$PWD:/mnt/project-root" \ + --workdir /mnt/project-root \ + --tty --interactive \ + --init \ + hseeberger/scala-sbt:8u141-jdk_2.12.3_0.13.16 \ + "$@" diff --git a/project/plugins.sbt b/project/plugins.sbt index 28622b58..4eff112d 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,3 +1,5 @@ addSbtPlugin("com.thoughtworks.sbt-best-practice" % "sbt-best-practice" % "2.5.0") addSbtPlugin("com.thoughtworks.example" % "sbt-example" % "2.0.2") + +addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.3.4")