Skip to content

Commit

Permalink
Add OpenVINO load model test
Browse files Browse the repository at this point in the history
Resolve merge conflicts

Typo fix
  • Loading branch information
rajatkrishna committed Aug 28, 2023
1 parent 7d1f21d commit 10f9851
Show file tree
Hide file tree
Showing 9 changed files with 83 additions and 26 deletions.
8 changes: 6 additions & 2 deletions src/main/scala/com/johnsnowlabs/ml/ai/RoBerta.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,12 @@ import scala.collection.JavaConverters._

/** TensorFlow backend for '''RoBERTa''' and '''Longformer'''
*
* @param tensorflowWrapper
* tensorflowWrapper class
* @param tensorflowWrapper
* Model wrapper with TensorFlow Wrapper
* @param onnxWrapper
* Model wrapper with ONNX Wrapper
* @param openvinoWrapper
* Model wrapper with OpenVINO Wrapper
* @param sentenceStartTokenId
* special token id for `<s>`
* @param sentenceEndTokenId
Expand Down
5 changes: 4 additions & 1 deletion src/main/scala/com/johnsnowlabs/ml/ai/XlmRoberta.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import com.johnsnowlabs.ml.tensorflow.{TensorResources, TensorflowWrapper}
import com.johnsnowlabs.ml.util.{ModelArch, ONNX, Openvino, TensorFlow}
import com.johnsnowlabs.nlp.annotators.common._
import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}
import org.intel.openvino.Tensor

import scala.collection.JavaConverters._

Expand Down Expand Up @@ -65,6 +64,10 @@ import scala.collection.JavaConverters._
*
* @param tensorflowWrapper
* XlmRoberta Model wrapper with TensorFlowWrapper
* @param onnxWrapper
* XlmRoberta Model wrapper with ONNX Wrapper
* @param openvinoWrapper
* XlmRoberta Model wrapper with OpenVINO Wrapper
* @param spp
* XlmRoberta SentencePiece model with SentencePieceWrapper
* @param caseSensitive
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
/*
* Copyright 2017-2022 John Snow Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.johnsnowlabs.ml.openvino

import com.johnsnowlabs.util.FileHelper
Expand Down
38 changes: 24 additions & 14 deletions src/main/scala/com/johnsnowlabs/ml/openvino/OpenvinoWrapper.scala
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
/*
* Copyright 2017-2022 John Snow Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.johnsnowlabs.ml.openvino

import com.johnsnowlabs.ml.tensorflow.io.ChunkBytes
import com.johnsnowlabs.ml.tensorflow.sign.ModelSignatureConstants._
import com.johnsnowlabs.ml.tensorflow.sign.{ModelSignatureConstants, ModelSignatureManager}
import com.johnsnowlabs.ml.tensorflow.sign.ModelSignatureConstants
import com.johnsnowlabs.ml.util.{ONNX, Openvino, TensorFlow}
import com.johnsnowlabs.util.{FileHelper, ZipArchiveUtil}
import org.apache.commons.io.FileUtils
Expand Down Expand Up @@ -74,37 +89,30 @@ class OpenvinoWrapper(modelBytes: Array[Byte], weightsBytes: Array[Array[Byte]])
object OpenvinoWrapper {

private val logger: Logger = LoggerFactory.getLogger(this.getClass.toString)
private[OpenvinoWrapper] val core: Core = this.synchronized {
if (core == null) {
new Core()
} else {
core
}
}
private[OpenvinoWrapper] val core: Core = new Core

// size of bytes store in each chunk/array
private val BUFFER_SIZE = 1024 * 1024

private val ModelSuffix = "_ov_model"

/** Reads models from supported file formats and exports them into OpenVINO Intermediate
* Representation (IR) format. The resulting framework-independent model representation
* consists of a model graph (.xml) and weights (.bin) files.
* Representation (IR) format. The resulting model representation consists of a model graph
* (.xml) and weights (.bin) files.
*
* @param modelPath
* Path to the source model
* @param targetPath
* Path to the converted model directory
* @param useBundle
* Read from a provided model bundle
* @param detectedEngine
* The detected model framework
* @param zipped
* Unpack the zipped model
*/
def convertToOpenvinoFormat(
modelPath: String,
targetPath: String,
detectedEngine: String,
useBundle: Boolean,
zipped: Boolean = true): Unit = {
val tmpFolder = Files
.createTempDirectory(UUID.randomUUID().toString.takeRight(12) + ModelSuffix)
Expand All @@ -126,6 +134,8 @@ object OpenvinoWrapper {
folder
case ONNX.name =>
Paths.get(folder, ONNX.modelName).toString
case _ =>
throw new Exception(s"Unsupported model framework ${detectedEngine}!")
}

val model: Model = core.read_model(srcModelPath)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,6 @@ object LoadExternalModel {

}

def detectEngine(modelPath: String, isEncoderDecoder: Boolean = false): String = {
}
def isOpenvinoModel(modelPath: String): Boolean = {
val modelXml = new File(modelPath, Openvino.modelXml)
val modelBin = new File(modelPath, Openvino.modelBin)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,6 @@ trait ReadBertDLModel extends ReadTensorflowModel with ReadOnnxModel with ReadOp
val onnxWrapper =
readOnnxModel(path, spark, "_bert_onnx", zipped = true, useBundle = false, None)
instance.setModelIfNotSet(spark, None, Some(onnxWrapper), None)
}

case Openvino.name =>
val openvinoWrapper = readOpenvinoModel(path, spark, "_bert_openvino")
Expand Down Expand Up @@ -497,8 +496,7 @@ trait ReadBertDLModel extends ReadTensorflowModel with ReadOnnxModel with ReadOp
modelPath = localModelPath,
targetPath = tmpFolder,
detectedEngine = detectedEngine,
zipped = false,
useBundle = true)
zipped = false)
tmpFolder
}
val (ovWrapper: OpenvinoWrapper, tensorNames: Map[String, String]) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -549,8 +549,7 @@ trait ReadRobertaDLModel extends ReadTensorflowModel with ReadOnnxModel with Rea
modelPath = localModelPath,
targetPath = tmpFolder,
detectedEngine = detectedEngine,
zipped = false,
useBundle = true)
zipped = false)
tmpFolder
}
val (ovWrapper: OpenvinoWrapper, tensorNames: Map[String, String]) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -498,8 +498,7 @@ trait ReadXlmRobertaDLModel
modelPath = localModelPath,
targetPath = tmpFolder,
detectedEngine = detectedEngine,
zipped = false,
useBundle = true)
zipped = false)
tmpFolder
}
val (ovWrapper: OpenvinoWrapper, tensorNames: Map[String, String]) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,36 @@ class BertEmbeddingsTestSpec extends AnyFlatSpec {
pipelineModel.transform(ddd)
}

"Bert Embeddings" should "correctly load custom model with OpenVINO" taggedAs SlowTest in {

import ResourceHelper.spark.implicits._

val ddd = Seq("Something is weird on the notebooks, something is happening.").toDF("text")

val document = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")

val tokenizer = new Tokenizer()
.setInputCols(Array("document"))
.setOutputCol("token")

val tfModelPath = "src/test/resources/tf-hub-bert/model"

val embeddings = BertEmbeddings
.loadSavedModel(tfModelPath, ResourceHelper.spark, useOpenvino = true)
.setInputCols(Array("token", "document"))
.setOutputCol("bert")
.setStorageRef("ov_bert_test")

val pipeline = new Pipeline().setStages(Array(document, tokenizer, embeddings))

pipeline.fit(ddd).write.overwrite().save("./tmp_bert_pipeline")
val pipelineModel = PipelineModel.load("./tmp_bert_pipeline")

pipelineModel.transform(ddd)
}

"Bert Embeddings" should "be aligned with custom tokens from Tokenizer" taggedAs SlowTest in {

import ResourceHelper.spark.implicits._
Expand Down

0 comments on commit 10f9851

Please sign in to comment.