From d767a31038afecdeb50e3195b1628df5f0f609dc Mon Sep 17 00:00:00 2001
From: Anisha Kulkarni <anisha.dattatraya.kulkarni@intel.com>
Date: Wed, 8 Sep 2021 18:09:03 -0700
Subject: [PATCH] Add support for QuantizedLSTM operation

This change also adds support for dequantization for
GNA as GNAPlugin does not support Convert operation.

TEST=asr-perf-eval runs succesfully for encoder
      0 and encoder 1
---
 BUILD.gn                                      |   6 +-
 BasePreparedModel.cpp                         |  43 +-
 ModelManager.cpp                              |  22 +-
 ModelManager.h                                |   7 +
 gna/GnaPreparedModel.cpp                      |  16 +-
 .../include/NgraphNetworkCreator.hpp          |   1 +
 ngraph_creator/include/NgraphNodes.hpp        |   5 +-
 ngraph_creator/include/OperationsFactory.hpp  |   1 +
 .../operations/include/OperationsBase.hpp     |  74 ++-
 .../operations/include/QuantizedLSTM.hpp      |  40 ++
 .../operations/src/OperationsBase.cpp         |   4 +
 .../operations/src/QuantizedLSTM.cpp          | 471 ++++++++++++++++++
 ngraph_creator/src/NgraphNetworkCreator.cpp   |  46 +-
 ngraph_creator/src/NgraphNodes.cpp            |   9 +-
 ngraph_creator/src/OperationsFactory.cpp      |   2 +
 utils.h                                       |   1 +
 16 files changed, 717 insertions(+), 31 deletions(-)
 create mode 100644 ngraph_creator/operations/include/QuantizedLSTM.hpp
 create mode 100644 ngraph_creator/operations/src/QuantizedLSTM.cpp

diff --git a/BUILD.gn b/BUILD.gn
index 7b73d1ad5..806ca569d 100755
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -118,6 +118,7 @@ shared_library("intel_nnhal") {
     "ngraph_creator/operations/src/Pad_V2.cpp",
     "ngraph_creator/operations/src/Pow.cpp",
     "ngraph_creator/operations/src/Quantize.cpp",
+    "ngraph_creator/operations/src/QuantizedLSTM.cpp",
     "ngraph_creator/operations/src/Reduce_All.cpp",
     "ngraph_creator/operations/src/Reduce_Any.cpp",
     "ngraph_creator/operations/src/Reduce_Max.cpp",
@@ -194,9 +195,10 @@ shared_library("intel_nnhal") {
     "nnapi-support",
     "ngraph",
     "inference_engine",
-    "nn-common", 
+    "nn-common",
     "ssl",
-    "crypto"
+    "crypto",
+    "MKLDNNPlugin"
   ]
   lib_dirs = [
     "${sysroot}/usr/local/deployment_tools/inference_engine/lib/intel64/",
diff --git a/BasePreparedModel.cpp b/BasePreparedModel.cpp
index 3c2513aff..58f39f429 100644
--- a/BasePreparedModel.cpp
+++ b/BasePreparedModel.cpp
@@ -180,6 +180,8 @@ void asyncExecute(const Request& request, MeasureTiming measure, BasePreparedMod
             operandType == OperandType::TENSOR_QUANT8_SYMM ||
             operandType == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL)
             expectedLength /= 4;  // 8bit expected instead of 32bit
+        else if(operandType == OperandType::TENSOR_QUANT16_SYMM)
+            expectedLength /= 2;  // 16bit expected instead of 32bit
         if (rActualLength != expectedLength) {
             ALOGE("%s Invalid length at outIndex(%d) Actual:%d Expected:%d", __func__, outIndex,
                   rActualLength, expectedLength);
@@ -203,12 +205,26 @@ void asyncExecute(const Request& request, MeasureTiming measure, BasePreparedMod
                 break;
             }
             case OperandType::TENSOR_QUANT8_ASYMM: {
-                floatToUint8(srcBlob->buffer().as<float*>(), (uint8_t*)destPtr, srcBlob->size());
+                modelInfo->getOperandScaleZeroPoint(outIndex, sc, zp);
+                for (int i = 0; i < srcBlob->size() ; i++) {
+                    *((uint8_t*)destPtr + i) = static_cast<uint8_t>(zp + (*(srcBlob->buffer().as<float*>() + i) / sc));
+                }
                 break;
             }
             case OperandType::TENSOR_QUANT8_SYMM:
+            case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
             case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL: {
-                floatToint8(srcBlob->buffer().as<float*>(), (int8_t*)destPtr, srcBlob->size());
+                modelInfo->getOperandScaleZeroPoint(outIndex, sc, zp);
+                for (int i = 0; i < srcBlob->size() ; i++) {
+                    *((int8_t*)destPtr + i) = static_cast<int8_t>(zp + (*(srcBlob->buffer().as<float*>() + i) / sc));
+                }
+                break;
+            }
+            case OperandType::TENSOR_QUANT16_SYMM: {
+                modelInfo->getOperandScaleZeroPoint(outIndex, sc, zp);
+                for (int i = 0; i < srcBlob->size() ; i++) {
+                    *((int16_t*)destPtr + i) = static_cast<int16_t>(zp + (*(srcBlob->buffer().as<float*>() + i) / sc));
+                }
                 break;
             }
             default:
@@ -295,9 +311,12 @@ static std::tuple<ErrorStatus, hidl_vec<V1_2::OutputShape>, Timing> executeSynch
         auto outDims = srcBlob->getTensorDesc().getDims();
         if (operandType == OperandType::TENSOR_BOOL8 ||
             operandType == OperandType::TENSOR_QUANT8_ASYMM ||
+            operandType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED ||
             operandType == OperandType::TENSOR_QUANT8_SYMM ||
             operandType == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL)
             expectedLength /= 4;  // 8bit expected instead of 32bit
+        else if(operandType == OperandType::TENSOR_QUANT16_SYMM)
+            expectedLength /= 2;  // 16bit expected instead of 32bit
         if (rActualLength != expectedLength) {
             ALOGE("%s Invalid length(%d) at outIndex(%d)", __func__, rActualLength, outIndex);
             // Notify Insufficient Buffer Length to modelInfo
@@ -305,6 +324,8 @@ static std::tuple<ErrorStatus, hidl_vec<V1_2::OutputShape>, Timing> executeSynch
             return {ErrorStatus::OUTPUT_INSUFFICIENT_SIZE, modelInfo->getOutputShapes(), kNoTiming};
         } else
             modelInfo->updateOutputshapes(i, outDims);
+        float sc;
+        int32_t zp;
         switch (operandType) {
             case OperandType::TENSOR_INT32:
             case OperandType::TENSOR_FLOAT32: {
@@ -317,12 +338,26 @@ static std::tuple<ErrorStatus, hidl_vec<V1_2::OutputShape>, Timing> executeSynch
                 break;
             }
             case OperandType::TENSOR_QUANT8_ASYMM: {
-                floatToUint8(srcBlob->buffer().as<float*>(), (uint8_t*)destPtr, srcBlob->size());
+                modelInfo->getOperandScaleZeroPoint(outIndex, sc, zp);
+                for (int i = 0; i < srcBlob->size() ; i++) {
+                    *((uint8_t*)destPtr + i) = static_cast<uint8_t>(zp + (*(srcBlob->buffer().as<float*>() + i) / sc));
+                }
                 break;
             }
             case OperandType::TENSOR_QUANT8_SYMM:
+            case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
             case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL: {
-                floatToint8(srcBlob->buffer().as<float*>(), (int8_t*)destPtr, srcBlob->size());
+                modelInfo->getOperandScaleZeroPoint(outIndex, sc, zp);
+                for (int i = 0; i < srcBlob->size() ; i++) {
+                    *((int8_t*)destPtr + i) = static_cast<int8_t>(zp + (*(srcBlob->buffer().as<float*>() + i) / sc));
+                }
+                break;
+            }
+            case OperandType::TENSOR_QUANT16_SYMM: {
+                modelInfo->getOperandScaleZeroPoint(outIndex, sc, zp);
+                for (int i = 0; i < srcBlob->size() ; i++) {
+                    *((int16_t*)destPtr + i) = static_cast<int16_t>(zp + (*(srcBlob->buffer().as<float*>() + i) / sc));
+                }
                 break;
             }
             default:
diff --git a/ModelManager.cpp b/ModelManager.cpp
index 3ec23e1b6..38eee117e 100755
--- a/ModelManager.cpp
+++ b/ModelManager.cpp
@@ -66,7 +66,10 @@ bool NnapiModelInfo::initializeRunTimeOperandInfo() {
             case OperandType::TENSOR_QUANT8_ASYMM:
             case OperandType::TENSOR_QUANT8_SYMM:
             case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+            case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
+            case OperandType::TENSOR_QUANT16_SYMM:
                 to.type = from.type;
+                to.scale = from.scale;
                 break;
             default:
                 ALOGE("wrong operand type %d", from.type);
@@ -284,7 +287,8 @@ Blob::Ptr NnapiModelInfo::GetInOutOperandAsBlob(RunTimeOperandInfo& op, const ui
             return blob;
         }
     } else if (op.type == OperandType::TENSOR_QUANT8_SYMM ||
-               op.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
+               op.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL ||
+               op.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
         ALOGV(
             "check if tensors of type TENSOR_QUANT8_SYMM/TENSOR_QUANT8_SYMM_PER_CHANNEL  "
             "supported");
@@ -302,6 +306,22 @@ Blob::Ptr NnapiModelInfo::GetInOutOperandAsBlob(RunTimeOperandInfo& op, const ui
             return blob;
         }
     }
+    else if (op.type == OperandType::TENSOR_QUANT16_SYMM) {
+        ALOGV("check if tensors of type TENSOR_QUANT16_SYMM supported");
+        InferenceEngine::TensorDesc td(InferenceEngine::Precision::I16, toDims(op.dimensions),
+                                       InferenceEngine::Layout::ANY);
+        if (buf == nullptr) {
+            ALOGD("TENSOR_QUANT16_SYMM buf is NULL !!!!!!!!!!!!!!!");
+            InferenceEngine::TBlob<int16_t>::Ptr blob =
+                std::make_shared<InferenceEngine::TBlob<int16_t>>(td);
+            blob->allocate();
+            return blob;
+        } else {
+            InferenceEngine::TBlob<int16_t>::Ptr blob =
+                std::make_shared<InferenceEngine::TBlob<int16_t>>(td, (int16_t*)buf, len);
+            return blob;
+        }
+    }
     return nullptr;
 }
 
diff --git a/ModelManager.h b/ModelManager.h
index 8d9feff74..33f1dbe8c 100755
--- a/ModelManager.h
+++ b/ModelManager.h
@@ -104,6 +104,13 @@ class NnapiModelInfo {
         return operand.zeroPoint;
     }
 
+    void getOperandScaleZeroPoint(int index, float& scale, int32_t& zp) {
+        auto operand = getOperand(index);
+        scale = operand.scale;
+        zp = operand.zeroPoint;
+        return;
+    }
+
     RunTimeOperandInfo& getRuntimeOperand(uint32_t index) {
         return mOperands[mModel.main.inputIndexes[index]];
     }
diff --git a/gna/GnaPreparedModel.cpp b/gna/GnaPreparedModel.cpp
index d9e8cf50f..b24c2ac5f 100755
--- a/gna/GnaPreparedModel.cpp
+++ b/gna/GnaPreparedModel.cpp
@@ -39,15 +39,21 @@ bool GnaPreparedModel::initialize(const Model& model) {
         ALOGE("%s ngraph generation failed", __func__);
         return false;
     }
-    auto ngraph_net = std::make_shared<InferenceEngine::CNNNetwork>(ngraph_function);
+    try {
+        auto ngraph_net = std::make_shared<InferenceEngine::CNNNetwork>(ngraph_function);
 #if __ANDROID__
-    ngraph_net->serialize("/data/vendor/neuralnetworks/ngraph_ir.xml",
+        ngraph_net->serialize("/data/vendor/neuralnetworks/ngraph_ir.xml",
                           "/data/vendor/neuralnetworks/ngraph_ir.bin");
 #else
-    ngraph_net->serialize("/tmp/ngraph_ir.xml", "/tmp/ngraph_ir.bin");
+        ngraph_net->serialize("/tmp/ngraph_ir.xml", "/tmp/ngraph_ir.bin");
 #endif
-    mPlugin = std::make_shared<IENetwork>(ngraph_net);
-    mPlugin->loadNetwork();
+        mPlugin = std::make_shared<IENetwork>(ngraph_net);
+        mPlugin->loadNetwork();
+    } catch (const std::exception& ex) {
+        ALOGE("%s Exception !!! %s", __func__, ex.what());
+        return false;
+    }
+
 
     ALOGV("Exiting %s", __func__);
     return true;
diff --git a/ngraph_creator/include/NgraphNetworkCreator.hpp b/ngraph_creator/include/NgraphNetworkCreator.hpp
index c2ff98d6a..af212abbd 100644
--- a/ngraph_creator/include/NgraphNetworkCreator.hpp
+++ b/ngraph_creator/include/NgraphNetworkCreator.hpp
@@ -17,6 +17,7 @@ class NgraphNetworkCreator {
     std::vector<std::shared_ptr<OperationsBase>> mOperationNodes;
     std::shared_ptr<NgraphNodes> mNgraphNodes;
     OperationsFactory mOpFactoryInstance;
+    const IntelDeviceType mPluginType;
     bool createInputParams();
     bool initializeModel();
 
diff --git a/ngraph_creator/include/NgraphNodes.hpp b/ngraph_creator/include/NgraphNodes.hpp
index a82791838..1f24788dd 100644
--- a/ngraph_creator/include/NgraphNodes.hpp
+++ b/ngraph_creator/include/NgraphNodes.hpp
@@ -17,7 +17,8 @@ class NgraphNodes {
     // in the path to current Operand.
     std::vector<bool> mForcedNchw;
     std::vector<std::shared_ptr<ngraph::opset3::Parameter>> mInputParams;
-    std::vector<std::shared_ptr<ngraph::Node>> mResultNodes;
+    std::vector<std::shared_ptr<ngraph::op::Result>> mResultNodes;
+    std::vector<std::shared_ptr<ngraph::op::Sink>> mSinkNodes;
     // mNodeNames are only populated when requested, as only Inputs and Result NodeNames are
     // required.
     std::map<int, std::string> mNodeNames;
@@ -30,6 +31,8 @@ class NgraphNodes {
     void setOutputAtOperandIndex(size_t index, ngraph::Output<ngraph::Node> output);
     ngraph::Output<ngraph::Node> getOperationOutput(size_t index);
     void setResultNode(size_t outputIndex, std::shared_ptr<ngraph::Node> resultNode);
+    void setSinkNode(std::shared_ptr<ngraph::op::Sink> sinkNode);
+
 
     const std::string& getNodeName(size_t index);
     void removeInputParameter(std::string name, size_t index);
diff --git a/ngraph_creator/include/OperationsFactory.hpp b/ngraph_creator/include/OperationsFactory.hpp
index b0cd08fc8..16b2ebcb6 100644
--- a/ngraph_creator/include/OperationsFactory.hpp
+++ b/ngraph_creator/include/OperationsFactory.hpp
@@ -46,6 +46,7 @@
 #include <Pad_V2.hpp>
 #include <Pow.hpp>
 #include <Quantize.hpp>
+#include <QuantizedLSTM.hpp>
 #include <RNN.hpp>
 #include <ROI_Align.hpp>
 #include <ROI_Pooling.hpp>
diff --git a/ngraph_creator/operations/include/OperationsBase.hpp b/ngraph_creator/operations/include/OperationsBase.hpp
index e637d0827..4ab83ffca 100644
--- a/ngraph_creator/operations/include/OperationsBase.hpp
+++ b/ngraph_creator/operations/include/OperationsBase.hpp
@@ -37,6 +37,7 @@ class OperationsBase {
     // override createNodeForPlugin in case sPluginType specific implementation is required
     virtual std::shared_ptr<ngraph::Node> createNodeForPlugin();
     void addResultNode(size_t index, std::shared_ptr<ngraph::Node> resultNode);
+    void addSinkNode(std::shared_ptr<ngraph::op::Sink> sinkNode);
 
     // helper functions
     bool checkOperandType(uint32_t operandIndex, const int32_t expectedOperandType,
@@ -46,12 +47,30 @@ class OperationsBase {
     const vec<uint32_t> getInputOperandDimensions(uint32_t inputIndex);
     bool isValidInputTensor(uint32_t inputIndex);
 
+    template<typename T>
+    bool deQuantize(const T* inputData, const uint32_t& len, const float scale,
+                const int32_t zeroPoint, float* outputData) {
+        int32_t value;
+        for (int i = 0; i < len; ++i) {
+            value = *(inputData + i);
+            outputData[i] = static_cast<float>(scale * (value - zeroPoint));
+        }
+    return true;
+    }
+
     std::shared_ptr<ngraph::Node> getInputNode(uint32_t inputIndex, bool dequantize = true) {
         std::shared_ptr<ngraph::Node> input;
         auto operandIndex = sModelInfo->getOperationInput(mNnapiOperationIndex, inputIndex);
         auto operandType = sModelInfo->getOperandType(operandIndex);
+        float scale;
+	    int32_t zp;
         if (sModelInfo->isOperandLifeTimeConst(operandIndex)) {
             auto operandDims = getInputOperandDimensions(inputIndex);
+            std::vector<float> f_operandValues;
+
+            if (sPluginType == IntelDeviceType::GNA) {
+                sModelInfo->getOperandScaleZeroPoint(operandIndex, scale, zp);
+            }
             ngraph::element::Type elementType;
             switch (operandType) {
                 case OperandType::TENSOR_FLOAT32: {
@@ -61,9 +80,16 @@ class OperationsBase {
                     break;
                 }
                 case OperandType::TENSOR_INT32: {
-                    elementType = ngraph::element::i32;
                     auto operandValues = sModelInfo->GetConstVecOperand<int>(operandIndex);
-                    input = createConstNode(elementType, toNgraphShape(operandDims), operandValues);
+                    if (sPluginType == IntelDeviceType::GNA) {
+                        elementType = ngraph::element::f32;
+                        f_operandValues.resize(operandValues.size());
+                        deQuantize(operandValues.data(), operandValues.size(), scale, zp, f_operandValues.data());
+                    }
+                    else {
+                        elementType = ngraph::element::i32;
+                        input = createConstNode(elementType, toNgraphShape(operandDims), operandValues);
+                    }
                     break;
                 }
                 case OperandType::TENSOR_BOOL8: {
@@ -73,16 +99,44 @@ class OperationsBase {
                     break;
                 }
                 case OperandType::TENSOR_QUANT8_ASYMM: {
-                    elementType = ngraph::element::u8;
                     auto operandValues = sModelInfo->GetConstVecOperand<uint8_t>(operandIndex);
-                    input = createConstNode(elementType, toNgraphShape(operandDims), operandValues);
+                    if (sPluginType == IntelDeviceType::GNA) {
+                        elementType = ngraph::element::f32;
+                        f_operandValues.resize(operandValues.size());
+                        deQuantize(operandValues.data(), operandValues.size(), scale, zp, f_operandValues.data());
+                    }
+                    else {
+                        elementType = ngraph::element::u8;
+                        input = createConstNode(elementType, toNgraphShape(operandDims), operandValues);
+                    }
                     break;
                 }
                 case OperandType::TENSOR_QUANT8_SYMM:
+                case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
                 case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL: {
-                    elementType = ngraph::element::i8;
                     auto operandValues = sModelInfo->GetConstVecOperand<int8_t>(operandIndex);
-                    input = createConstNode(elementType, toNgraphShape(operandDims), operandValues);
+                    if (sPluginType == IntelDeviceType::GNA) {
+                        elementType = ngraph::element::f32;
+                        f_operandValues.resize(operandValues.size());
+                        deQuantize(operandValues.data(), operandValues.size(), scale, zp, f_operandValues.data());
+                    }
+                    else {
+                        elementType = ngraph::element::i8;
+                        input = createConstNode(elementType, toNgraphShape(operandDims), operandValues);
+                    }
+                    break;
+                }
+                case OperandType::TENSOR_QUANT16_SYMM: {
+                    auto operandValues = sModelInfo->GetConstVecOperand<int16_t>(operandIndex);
+                    if (sPluginType == IntelDeviceType::GNA) {
+                        elementType = ngraph::element::f32;
+                        f_operandValues.resize(operandValues.size());
+                        deQuantize(operandValues.data(), operandValues.size(), scale, zp, f_operandValues.data());
+                    }
+                    else {
+                        elementType = ngraph::element::i16;
+                        input = createConstNode(elementType, toNgraphShape(operandDims), operandValues);
+		    }
                     break;
                 }
                 default: {
@@ -91,12 +145,14 @@ class OperationsBase {
                     return nullptr;
                 }
             }
-
+            if (sPluginType == IntelDeviceType::GNA && operandType != OperandType::TENSOR_FLOAT32) {
+                input = createConstNode(elementType, toNgraphShape(operandDims), f_operandValues);
+            }
         } else {
             input = mNgraphNodes->getOperationOutput(operandIndex).get_node_shared_ptr();
         }
-
-        if (operandType == OperandType::TENSOR_QUANT8_ASYMM && dequantize) {
+        if (operandType != OperandType::TENSOR_FLOAT32 && dequantize
+                && sPluginType != IntelDeviceType::GNA && !sModelInfo->isOperandLifeTimeTemp(operandIndex)) {
             input = DequantizeNode(input, operandIndex, ngraph::element::f32);
         }
 
diff --git a/ngraph_creator/operations/include/QuantizedLSTM.hpp b/ngraph_creator/operations/include/QuantizedLSTM.hpp
new file mode 100644
index 000000000..944ee478b
--- /dev/null
+++ b/ngraph_creator/operations/include/QuantizedLSTM.hpp
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <OperationsBase.hpp>
+
+namespace android {
+namespace hardware {
+namespace neuralnetworks {
+namespace nnhal {
+
+class QuantizedLSTM : public OperationsBase {
+public:
+    QuantizedLSTM(int operationIndex);
+    bool validate() override;
+    std::shared_ptr<ngraph::Node> createNode() override;
+    void connectOperationToGraph() override;
+
+    std::shared_ptr<ngraph::Node> add(const ngraph::Output<ngraph::Node>& lhs,
+                                      const ngraph::Output<ngraph::Node>& rhs);
+    std::shared_ptr<ngraph::Node> sub(const ngraph::Output<ngraph::Node>& lhs,
+                                      const ngraph::Output<ngraph::Node>& rhs);
+    std::shared_ptr<ngraph::Node> mul(const ngraph::Output<ngraph::Node>& lhs,
+                                      const ngraph::Output<ngraph::Node>& rhs);
+    std::shared_ptr<ngraph::Node> matMul(const ngraph::Output<ngraph::Node>& lhs,
+                                         const ngraph::Output<ngraph::Node>& rhs,
+                                         bool transpose_lhs, bool transpose_rhs);
+    std::shared_ptr<ngraph::Node> clip(const ngraph::Output<ngraph::Node>& data,
+                                       float m_clip) const;
+    std::shared_ptr<ngraph::Node> applyActivation(const std::shared_ptr<ngraph::Node>& arg,
+                                                  int activationFn) const;
+    std::shared_ptr<ngraph::Node> LayerNorm(const ngraph::Output<ngraph::Node>& input,
+                                            const std::shared_ptr<ngraph::Node>& normalizedweights,
+                                            const std::shared_ptr<ngraph::Node>& bias);
+
+    bool isValidInputTensor(uint32_t inputIndex);
+};
+
+}  // namespace nnhal
+}  // namespace neuralnetworks
+}  // namespace hardware
+}  // namespace android
\ No newline at end of file
diff --git a/ngraph_creator/operations/src/OperationsBase.cpp b/ngraph_creator/operations/src/OperationsBase.cpp
index b7c66d785..dc96cdb7b 100755
--- a/ngraph_creator/operations/src/OperationsBase.cpp
+++ b/ngraph_creator/operations/src/OperationsBase.cpp
@@ -66,6 +66,10 @@ void OperationsBase::addResultNode(size_t index, std::shared_ptr<ngraph::Node> r
     mNgraphNodes->setResultNode(index, resultNode);
 }
 
+void OperationsBase::addSinkNode(std::shared_ptr<ngraph::op::Sink> sinkNode) {
+    mNgraphNodes->setSinkNode(sinkNode);
+}
+
 OperationsBase::OperationsBase(int operationIndex) : mNnapiOperationIndex(operationIndex) {
     mDefaultOutputIndex = 0;
 }
diff --git a/ngraph_creator/operations/src/QuantizedLSTM.cpp b/ngraph_creator/operations/src/QuantizedLSTM.cpp
new file mode 100644
index 000000000..157a27793
--- /dev/null
+++ b/ngraph_creator/operations/src/QuantizedLSTM.cpp
@@ -0,0 +1,471 @@
+//#define LOG_NDEBUG 0
+#include <QuantizedLSTM.hpp>
+#define LOG_TAG "Quantized_LSTM"
+
+namespace android {
+namespace hardware {
+namespace neuralnetworks {
+namespace nnhal {
+
+#define ACTIVATION_FUNCTION_NONE 0
+#define ACTIVATION_FUNCTION_RELU 1
+#define ACTIVATION_FUNCTION_RELU6 3
+#define ACTIVATION_FUNCTION_TANH 4
+#define ACTIVATION_FUNCTION_SIGMOID 6
+
+QuantizedLSTM::QuantizedLSTM(int operationIndex) : OperationsBase(operationIndex) {
+    mDefaultOutputIndex = sModelInfo->getOperationOutput(mNnapiOperationIndex, 0);
+}
+
+bool QuantizedLSTM::validate() {
+    // Check all Output types
+    if (!checkOutputOperandType(0, (int32_t)OperandType::TENSOR_QUANT8_ASYMM_SIGNED)) return false;
+    if (!checkOutputOperandType(1, (int32_t)OperandType::TENSOR_QUANT16_SYMM)) return false;
+    if (!checkOutputOperandType(2, (int32_t)OperandType::TENSOR_QUANT8_ASYMM_SIGNED)) return false;
+
+    const auto& inputsSize = sModelInfo->getOperationInputsSize(mNnapiOperationIndex);
+    const auto& outputsSize = sModelInfo->getOperationOutputsSize(mNnapiOperationIndex);
+
+    if (inputsSize != 32) {
+        return false;
+    }
+
+    if (outputsSize != 3) return false;
+
+    // check 0, 18, 19 input values
+    if (!checkInputOperandType(0, (int32_t)OperandType::TENSOR_QUANT8_ASYMM_SIGNED)) return false;
+    if (!checkInputOperandType(18, (int32_t)OperandType::TENSOR_QUANT8_ASYMM_SIGNED)) return false;
+    if (!checkInputOperandType(19, (int32_t)OperandType::TENSOR_QUANT16_SYMM)) return false;
+
+    // check input type for 2 to 4, 6 to 8
+    for (int i = 2; i <= 4; i++) {
+        if (!checkInputOperandType(i, (int32_t)OperandType::TENSOR_QUANT8_SYMM)) return false;
+    }
+    for (int i = 6; i <= 8; i++) {
+        if (!checkInputOperandType(i, (int32_t)OperandType::TENSOR_QUANT8_SYMM)) return false;
+    }
+
+    // check input type for 13,14,15
+    for (int i = 13; i <= 15; i++) {
+        if (!checkInputOperandType(i, (int32_t)OperandType::TENSOR_INT32)) return false;
+    }
+
+    if (!sModelInfo->isOmittedInput(mNnapiOperationIndex, 1) &&
+        !sModelInfo->isOmittedInput(mNnapiOperationIndex, 5) &&
+        !sModelInfo->isOmittedInput(mNnapiOperationIndex, 12)) {
+        // CIFG diabled, check input types
+        if (!checkInputOperandType(1, (int32_t)OperandType::TENSOR_QUANT8_SYMM)) return false;
+        if (!checkInputOperandType(5, (int32_t)OperandType::TENSOR_QUANT8_SYMM)) return false;
+        if (!checkInputOperandType(12, (int32_t)OperandType::TENSOR_INT32)) return false;
+    }
+
+    if (!sModelInfo->isOmittedInput(mNnapiOperationIndex, 9) &&
+        !sModelInfo->isOmittedInput(mNnapiOperationIndex, 10) &&
+        !sModelInfo->isOmittedInput(mNnapiOperationIndex, 11)) {
+        // peephole enabled, check input types
+        if (!checkInputOperandType(9, (int32_t)OperandType::TENSOR_QUANT16_SYMM)) return false;
+        if (!checkInputOperandType(10, (int32_t)OperandType::TENSOR_QUANT16_SYMM)) return false;
+        if (!checkInputOperandType(11, (int32_t)OperandType::TENSOR_QUANT16_SYMM)) return false;
+    }
+
+    if (!sModelInfo->isOmittedInput(mNnapiOperationIndex, 20) &&
+        !sModelInfo->isOmittedInput(mNnapiOperationIndex, 21) &&
+        !sModelInfo->isOmittedInput(mNnapiOperationIndex, 22) &&
+        !sModelInfo->isOmittedInput(mNnapiOperationIndex, 23)) {
+        // Layer Normalization present
+        if (!checkInputOperandType(20, (int32_t)OperandType::TENSOR_QUANT16_SYMM)) return false;
+        if (!checkInputOperandType(21, (int32_t)OperandType::TENSOR_QUANT16_SYMM)) return false;
+        if (!checkInputOperandType(22, (int32_t)OperandType::TENSOR_QUANT16_SYMM)) return false;
+        if (!checkInputOperandType(23, (int32_t)OperandType::TENSOR_QUANT16_SYMM)) return false;
+    }
+
+    ALOGV("%s PASSED", __func__);
+    return true;
+}
+
+void QuantizedLSTM::connectOperationToGraph() { createNode(); }
+
+std::shared_ptr<ngraph::Node> QuantizedLSTM::createNode() {
+
+    const auto& inputsSize = sModelInfo->getOperationInputsSize(mNnapiOperationIndex);
+
+    bool isCIFGenabled = false, isPeepholeUsed = false, isProjectionUsed = false,
+         isLayerNormUsed = false, isCifgDimsEmpty = true;
+
+    // checking if CIFG enabled
+    if (sModelInfo->isOmittedInput(mNnapiOperationIndex, 1) &&
+        sModelInfo->isOmittedInput(mNnapiOperationIndex, 5) &&
+        sModelInfo->isOmittedInput(mNnapiOperationIndex, 12)) {
+        isCIFGenabled = true;
+    } else {
+        if (isValidInputTensor(1) && isValidInputTensor(5) && isValidInputTensor(12))
+            isCIFGenabled = false;
+        else
+            isCIFGenabled = true;
+    }
+
+    // checking if peephole enabled
+    if (sModelInfo->isOmittedInput(mNnapiOperationIndex, 9) &&
+        sModelInfo->isOmittedInput(mNnapiOperationIndex, 10) &&
+        sModelInfo->isOmittedInput(mNnapiOperationIndex, 11)) {
+        isPeepholeUsed = false;
+    } else {
+        if (!isCIFGenabled && !isValidInputTensor(9) && isValidInputTensor(10) &&
+            isValidInputTensor(11)) {
+            isCIFGenabled = true;
+            isCifgDimsEmpty = false;
+        }
+        if (isCIFGenabled) {
+            if (isValidInputTensor(10) && isValidInputTensor(11))
+                isPeepholeUsed = true;
+            else
+                isPeepholeUsed = false;
+        } else {
+            if (isValidInputTensor(9) && isValidInputTensor(10) && isValidInputTensor(11))
+                isPeepholeUsed = true;
+            else
+                isPeepholeUsed = false;
+        }
+    }
+
+    // checking if projection enabled
+    if (sModelInfo->isOmittedInput(mNnapiOperationIndex, 16)) {
+        isProjectionUsed = false;
+    } else {
+        if (isValidInputTensor(16))
+            isProjectionUsed = true;
+        else
+            isProjectionUsed = false;
+    }
+
+    // checking if layer normalization enabled
+    if (sModelInfo->isOmittedInput(mNnapiOperationIndex, 20) &&
+        sModelInfo->isOmittedInput(mNnapiOperationIndex, 21) &&
+        sModelInfo->isOmittedInput(mNnapiOperationIndex, 22) &&
+        sModelInfo->isOmittedInput(mNnapiOperationIndex, 23)) {
+        isLayerNormUsed = false;
+    } else {
+        if (isCIFGenabled) {
+            if (isValidInputTensor(21) && isValidInputTensor(22) && isValidInputTensor(23))
+                isLayerNormUsed = true;
+            else
+                isLayerNormUsed = false;
+        } else {
+            if (isValidInputTensor(20) && isValidInputTensor(21) && isValidInputTensor(22) &&
+                isValidInputTensor(23))
+                isLayerNormUsed = true;
+            else
+                isLayerNormUsed = false;
+        }
+    }
+
+    std::shared_ptr<ngraph::Node> inputNode, input2input_weights, input2forget_weights,
+        input2cell_weights, input2output_weights, recurrent2input_weights, recurrent2forget_weights,
+        recurrent2cell_weights, recurrent2output_weights, cell2input_weights, cell2forget_weights,
+        cell2output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias,
+        projection_weights, projection_bias;
+    uint32_t activationFn;
+    float cell_state_clipping, proj_clipping;
+
+    const auto& inputNode_dims = getInputOperandDimensions(0);
+    const auto& initial_hidden_state_dims = getInputOperandDimensions(18);
+    const auto& initial_cell_state_dims = getInputOperandDimensions(19);
+
+    auto batch_size = inputNode_dims[0];
+    auto input_size = inputNode_dims[1];
+    auto num_units = initial_cell_state_dims[1];
+    auto output_size = initial_hidden_state_dims[1];
+
+    // Creating input nodes
+    inputNode = getInputNode(0);
+    const auto& elementType = inputNode->get_element_type();
+    ngraph::element::Type cellElementType = ngraph::element::f32;
+    // W_{xi}, W_{xf}, W_{xc}, W_{xo}
+    if (isCIFGenabled) {
+        if (!isCifgDimsEmpty) removeInputNode(1);
+    } else {
+        input2input_weights = getInputNode(1);
+    }
+    input2forget_weights = getInputNode(2);
+    input2cell_weights = getInputNode(3);
+    input2output_weights = getInputNode(4);
+
+    // W_{hi}, W_{hf}, W_{hc}, W_{ho}
+    if (isCIFGenabled) {
+        if (!isCifgDimsEmpty) removeInputNode(5);
+    } else {
+        recurrent2input_weights = getInputNode(5);
+    }
+    recurrent2forget_weights = getInputNode(6);
+    recurrent2cell_weights = getInputNode(7);
+    recurrent2output_weights = getInputNode(8);
+
+    std::vector<float> init_hidden(output_size, 0.0f);
+    std::vector<float> init_cell(num_units, 0.0f);
+    static int assign_read_count = 0;
+    auto constant_hidden = std::make_shared<ngraph::opset3::Constant>(ngraph::element::f32, ngraph::Shape{1, output_size},
+                                                                        init_hidden);
+    auto constant_cell = std::make_shared<ngraph::opset3::Constant>(ngraph::element::f32, ngraph::Shape{1, num_units},
+                                                                        init_cell);
+
+    auto read_value_hidden = std::make_shared<ngraph::opset3::ReadValue>(constant_hidden, "variable_hidden_" + std::to_string(assign_read_count));
+    auto read_value_cell = std::make_shared<ngraph::opset3::ReadValue>(constant_cell, "variable_cell"+ std::to_string(assign_read_count));
+    assign_read_count++;
+
+    // W_{ci}, W_{cf}, W_{co}
+    if (isPeepholeUsed) {
+        if (isCIFGenabled)
+            cell2input_weights =
+                createConstNode(cellElementType, ngraph::Shape{num_units}, convertToVector(0));
+        else
+            cell2input_weights = getInputNode(9);
+        cell2forget_weights = getInputNode(10);
+        cell2output_weights = getInputNode(11);
+    } else {
+        cell2input_weights =
+            createConstNode(cellElementType, ngraph::Shape{1, num_units}, convertToVector(0));
+        cell2forget_weights =
+            createConstNode(cellElementType, ngraph::Shape{1, num_units}, convertToVector(0));
+        cell2output_weights =
+            createConstNode(cellElementType, ngraph::Shape{1, num_units}, convertToVector(0));
+    }
+
+    // b_i, b_f, b_c, b_o
+    if (isCIFGenabled) {
+        if (!isCifgDimsEmpty) removeInputNode(12);
+    } else {
+        input_gate_bias = getInputNode(12);
+    }
+    forget_gate_bias = getInputNode(13);
+    cell_bias = getInputNode(14);
+    output_gate_bias = getInputNode(15);
+
+    // W_{proj}, b_{proj}
+    if (isProjectionUsed) {
+        projection_weights = getInputNode(16);
+        if (isValidInputTensor(17))
+            projection_bias = getInputNode(17);
+        else
+            projection_bias =
+                createConstNode(elementType, ngraph::Shape{output_size}, convertToVector(0));
+    }
+
+    cell_state_clipping = sModelInfo->ParseOperationInput<float>(mNnapiOperationIndex, 24);
+
+    if (isProjectionUsed)
+        proj_clipping = sModelInfo->ParseOperationInput<float>(mNnapiOperationIndex, 25);
+
+    std::shared_ptr<ngraph::Node> i_t, f_t, c_t, o_t;
+
+    std::shared_ptr<ngraph::Node> input_layer_norm_weights, forget_layer_norm_weights,
+        cell_layer_norm_weights, output_layer_norm_weights;
+    if (isLayerNormUsed) {
+        if (!isCIFGenabled) input_layer_norm_weights = getInputNode(20);
+        forget_layer_norm_weights = getInputNode(21);
+        cell_layer_norm_weights = getInputNode(22);
+        output_layer_norm_weights = getInputNode(23);
+    }
+
+    // i_t = W_{xi}x_t+W_{hi}h_{t-1}+W_{ci}C_{t-1}
+    if (!isCIFGenabled)
+        i_t = add(add(matMul(inputNode, input2input_weights, false, true),
+                      matMul(read_value_hidden, recurrent2input_weights, false, true)),
+                  mul(cell2input_weights, read_value_cell));
+
+    // f_t = W_{xf}x_t+W_{hf}h_{t-1}+W_{cf}C_{t-1}
+    f_t = add(add(matMul(inputNode, input2forget_weights, false, true),
+                  matMul(read_value_hidden, recurrent2forget_weights, false, true)),
+              mul(cell2forget_weights, read_value_cell));
+    // c_t = W_{xc}x_t+W_{hc}h_{t-1}
+    c_t = add(matMul(inputNode, input2cell_weights, false, true),
+              matMul(read_value_hidden, recurrent2cell_weights, false, true));
+    // o_t = W_{xo}x_t+W_{ho}h_{t-1}
+    o_t = add(matMul(inputNode, input2output_weights, false, true),
+              matMul(read_value_hidden, recurrent2output_weights, false, true));
+
+    /* ################# Update Forget Gate ################# */
+    if (isLayerNormUsed) {
+        f_t = LayerNorm(f_t, forget_layer_norm_weights, forget_gate_bias);
+    } else {
+        // W_{xf}x_t + W_{hf}h_{t-1} + W_{cf}C_{t-1} + b_f
+        f_t = add(f_t, forget_gate_bias);
+    }
+    // sigma(W_{xf}x_t + W_{hf}h_{t-1} + W_{cf}C_{t-1} + b_f)
+    f_t = applyActivation(f_t, ACTIVATION_FUNCTION_SIGMOID);
+
+    /* ################# Update Input Gate ################# */
+    if (isCIFGenabled) {
+        auto constNode = createConstNode(elementType, f_t->get_shape(), convertToVector(1.f));
+        // Couple input with forget gate: 1 - i_f
+        i_t = sub(constNode, f_t);
+    } else {
+        if (isLayerNormUsed) {
+            i_t = LayerNorm(i_t, input_layer_norm_weights, input_gate_bias);
+        } else {
+            // W_{xi}x_t + W_{hi}h_{t-1} + W_{ci}C_{t-1} + b_i
+            i_t = add(i_t, input_gate_bias);
+        }
+        // sigma(W_{xi}x_t + W_{hi}h_{t-1} + W_{ci}C_{t-1} + b_i)
+        i_t = applyActivation(i_t, ACTIVATION_FUNCTION_SIGMOID);
+    }
+
+    /* ################# Update Cell Gate ################# */
+
+    if (isLayerNormUsed) {
+        c_t = LayerNorm(c_t, cell_layer_norm_weights, cell_bias);
+    } else {
+        // W_{xc}x_t+W_{hc}h_{t-1}+b_c
+        c_t = add(c_t, cell_bias);
+    }
+    // g(W_{xc}x_t+W_{hc}h_{t-1}+b_c)
+     c_t = applyActivation(c_t, ACTIVATION_FUNCTION_TANH);
+
+    // ft (.) Ct-1 + it (.) ct
+    auto C = add(mul(f_t, read_value_cell), mul(i_t, c_t));
+    // clip(ft (.) Ct-1 + it (.) ct, t_{cell})
+    C = clip(C, cell_state_clipping);
+
+    /* ################# Update Output Gate ################# */
+
+    // W_{xo}x_t+W_{ho}h_{t-1}+W_{co}C_t
+    o_t = add(o_t, mul(cell2output_weights, C));
+    if (isLayerNormUsed) {
+        o_t = LayerNorm(o_t, output_layer_norm_weights, output_gate_bias);
+    } else {
+        // W_{xo}x_t+W_{ho}h_{t-1}+W_{co}C_t+b_o
+        o_t = add(o_t, output_gate_bias);
+    }
+
+    // sigma(W_{xo}x_t+W_{ho}h_{t-1}+W_{co}C_t+b_o)
+    o_t = applyActivation(o_t, ACTIVATION_FUNCTION_SIGMOID);
+
+    std::shared_ptr<ngraph::Node> H;
+    if (isProjectionUsed) {
+        // o_t odot g(C_t)
+        auto dotProd = mul(o_t, applyActivation(C, ACTIVATION_FUNCTION_TANH));
+        // W_{proj}(o_t odot g(C_t))
+        auto projWeightsProduct = matMul(projection_weights, dotProd, false, true);
+        // W_{proj}(o_t odot g(C_t))+b_{proj}
+        auto projBiasAdd = add(transpose(NC_CN, projWeightsProduct), projection_bias);
+        // clip(W_{proj}(o_t odot g(C_t))+b_{proj}, t_{proj})
+        H = clip(projBiasAdd, proj_clipping);
+    } else {
+        // o_t odot g(C_t)
+        H = mul(o_t, applyActivation(C, ACTIVATION_FUNCTION_TANH));
+    }
+
+    std::vector<std::shared_ptr<ngraph::Node>> QLstmOutputs(3, nullptr);
+    QLstmOutputs[0] = H;
+    QLstmOutputs[1] = C;
+    QLstmOutputs[2] = H;
+
+    auto assign_hidden = std::make_shared<ngraph::opset3::Assign>(H, read_value_hidden->get_variable_id());
+    auto assign_cell = std::make_shared<ngraph::opset3::Assign>(C, read_value_cell->get_variable_id());
+    assign_hidden->add_control_dependency(read_value_hidden);
+    assign_cell->add_control_dependency(read_value_cell);
+    addSinkNode(assign_hidden);
+    addSinkNode(assign_cell);
+
+    for (int i = 0; i < 3; i++) {
+        auto outputIndex = sModelInfo->getOperationOutput(mNnapiOperationIndex, i);
+        mNgraphNodes->setOutputAtOperandIndex(outputIndex, QLstmOutputs[i]);
+
+        const auto op = sModelInfo->getOperand(outputIndex);
+        if (op.lifetime == V1_3::OperandLifeTime::SUBGRAPH_OUTPUT) {
+            addResultNode(outputIndex, QLstmOutputs[i]);
+        }
+    }
+
+    return nullptr;
+}
+
+std::shared_ptr<ngraph::Node> QuantizedLSTM::add(const ngraph::Output<ngraph::Node>& lhs,
+                                        const ngraph::Output<ngraph::Node>& rhs) {
+    return {make_shared<ngraph::opset3::Add>(lhs, rhs, ngraph::op::AutoBroadcastType::NUMPY)};
+}
+
+std::shared_ptr<ngraph::Node> QuantizedLSTM::sub(const ngraph::Output<ngraph::Node>& lhs,
+                                        const ngraph::Output<ngraph::Node>& rhs) {
+    return {make_shared<ngraph::opset3::Subtract>(lhs, rhs, ngraph::op::AutoBroadcastType::NUMPY)};
+}
+
+std::shared_ptr<ngraph::Node> QuantizedLSTM::mul(const ngraph::Output<ngraph::Node>& lhs,
+                                        const ngraph::Output<ngraph::Node>& rhs) {
+    return {make_shared<ngraph::opset3::Multiply>(lhs, rhs, ngraph::op::AutoBroadcastType::NUMPY)};
+}
+
+std::shared_ptr<ngraph::Node> QuantizedLSTM::matMul(const ngraph::Output<ngraph::Node>& lhs,
+                                           const ngraph::Output<ngraph::Node>& rhs,
+                                           bool transpose_lhs, bool transpose_rhs) {
+    return {make_shared<ngraph::opset3::MatMul>(lhs, rhs, transpose_lhs, transpose_rhs)};
+}
+
+std::shared_ptr<ngraph::Node> QuantizedLSTM::clip(const ngraph::Output<ngraph::Node>& data,
+                                         float m_clip) const {
+    if (m_clip == 0.f) {
+        return data.get_node_shared_ptr();
+    }
+    return make_shared<ngraph::opset3::Clamp>(data, -m_clip, m_clip);
+}
+
+std::shared_ptr<ngraph::Node> QuantizedLSTM::applyActivation(const std::shared_ptr<ngraph::Node>& arg,
+                                                    int activationFn) const {
+    switch (activationFn) {
+        case ACTIVATION_FUNCTION_RELU:
+            return std::make_shared<ngraph::opset3::Relu>(arg);
+            break;
+        case ACTIVATION_FUNCTION_RELU6:
+            return std::make_shared<ngraph::opset3::Clamp>(arg, 0, 6);
+            break;
+        case ACTIVATION_FUNCTION_TANH:
+            return std::make_shared<ngraph::opset3::Tanh>(arg);
+            break;
+        case ACTIVATION_FUNCTION_SIGMOID:
+            return std::make_shared<ngraph::opset3::Sigmoid>(arg);
+            break;
+        default:
+            return std::make_shared<ngraph::opset3::Tanh>(arg);
+    }
+}
+
+std::shared_ptr<ngraph::Node> QuantizedLSTM::LayerNorm(
+    const ngraph::Output<ngraph::Node>& input,
+    const std::shared_ptr<ngraph::Node>& normalizationweights,
+    const std::shared_ptr<ngraph::Node>& bias) {
+    // LayerNormalization
+    auto normalizationConstant = createConstNode(ngraph::element::f32, {}, convertToVector(1e-8f));
+    auto axis = ngraph::op::Constant::create(ngraph::element::i32, {}, {-1});
+    auto mean = std::make_shared<ngraph::opset3::ReduceMean>(input, axis, true);
+    // x_i - mean_i
+    auto diff = sub(input, mean);
+    // (x_i - mean_i) ** 2
+    auto multiply = mul(diff, diff);
+    // mean((x_i - mean_i) ** 2)
+    auto var = std::make_shared<ngraph::opset3::ReduceMean>(multiply, axis, true);
+    // var_i + epsilon
+    auto add_var = add(var, normalizationConstant);
+    // sqrt(var_i + epsilon)
+    auto sqrt = std::make_shared<ngraph::opset3::Sqrt>(add_var);
+    // (x_i - mean_i) / sqrt(var_i + epsilon)
+    auto stddev_inv = std::make_shared<ngraph::opset3::Divide>(diff, sqrt);
+    // x_i_normalized * gamma
+    auto mul_norm_weights = mul(stddev_inv, normalizationweights);
+    // x_i_normalized * gamma + beta
+    auto output = add(mul_norm_weights, bias);
+
+    return output;
+}
+
+bool QuantizedLSTM::isValidInputTensor(uint32_t inputIndex) {
+    const auto& dims = getInputOperandDimensions(inputIndex);
+    if (dims.empty()) return false;
+
+    if (dims[0] == 0) return false;
+
+    return true;
+}
+
+}  // namespace nnhal
+}  // namespace neuralnetworks
+}  // namespace hardware
+}  // namespace android
diff --git a/ngraph_creator/src/NgraphNetworkCreator.cpp b/ngraph_creator/src/NgraphNetworkCreator.cpp
index c55ada48c..7535801ff 100644
--- a/ngraph_creator/src/NgraphNetworkCreator.cpp
+++ b/ngraph_creator/src/NgraphNetworkCreator.cpp
@@ -10,6 +10,7 @@ namespace nnhal {
 NgraphNetworkCreator::NgraphNetworkCreator(std::shared_ptr<NnapiModelInfo> modelInfo,
                                            IntelDeviceType deviceType)
     : mModelInfo(modelInfo),
+      mPluginType(deviceType),
       mNgraphNodes(std::make_shared<NgraphNodes>(mModelInfo->getOperandsSize(),
                                                  mModelInfo->getModelOutputsSize())),
       mOpFactoryInstance(deviceType, mModelInfo, mNgraphNodes) {
@@ -49,8 +50,14 @@ bool NgraphNetworkCreator::createInputParams() {
                         break;
                     case OperandType::INT32:
                     case OperandType::TENSOR_INT32:
-                        inputParam = std::make_shared<ngraph::opset3::Parameter>(
-                            ngraph::element::i32, ngraph::Shape(dims.begin(), dims.end()));
+                        if (mPluginType == IntelDeviceType::GNA) {
+                            inputParam = std::make_shared<ngraph::opset3::Parameter>(
+                            ngraph::element::f32, ngraph::Shape(dims.begin(), dims.end()));
+                        }
+                        else {
+                            inputParam = std::make_shared<ngraph::opset3::Parameter>(
+                                ngraph::element::i32, ngraph::Shape(dims.begin(), dims.end()));
+                        }
                         ALOGV("createInputParams created inputIndex %d, type %d", i,
                               nnapiOperand.type);
                         break;
@@ -62,18 +69,43 @@ bool NgraphNetworkCreator::createInputParams() {
                               nnapiOperand.type);
                         break;
                     case OperandType::TENSOR_QUANT8_ASYMM:
-                        inputParam = std::make_shared<ngraph::opset3::Parameter>(
-                            ngraph::element::u8, ngraph::Shape(dims.begin(), dims.end()));
+                        if (mPluginType == IntelDeviceType::GNA) {
+                            inputParam = std::make_shared<ngraph::opset3::Parameter>(
+                            ngraph::element::f32, ngraph::Shape(dims.begin(), dims.end()));
+                        }
+                        else {
+                            inputParam = std::make_shared<ngraph::opset3::Parameter>(
+                                ngraph::element::u8, ngraph::Shape(dims.begin(), dims.end()));
+                        }
                         ALOGV("createInputParams created inputIndex %d, type %d", i,
                               nnapiOperand.type);
                         break;
                     case OperandType::TENSOR_QUANT8_SYMM:
-                    case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
-                        inputParam = std::make_shared<ngraph::opset3::Parameter>(
-                            ngraph::element::i8, ngraph::Shape(dims.begin(), dims.end()));
+		    case OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL:
+                    case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
+                        if (mPluginType == IntelDeviceType::GNA) {
+                            inputParam = std::make_shared<ngraph::opset3::Parameter>(
+                            ngraph::element::f32, ngraph::Shape(dims.begin(), dims.end()));
+                        }
+                        else {
+                            inputParam = std::make_shared<ngraph::opset3::Parameter>(
+                                ngraph::element::i8, ngraph::Shape(dims.begin(), dims.end()));
+			}
                         ALOGV("createInputParams created inputIndex %d, type %d", i,
                               nnapiOperand.type);
                         break;
+                    case OperandType::TENSOR_QUANT16_SYMM:
+                    if (mPluginType == IntelDeviceType::GNA) {
+                            inputParam = std::make_shared<ngraph::opset3::Parameter>(
+                            ngraph::element::f32, ngraph::Shape(dims.begin(), dims.end()));
+                        }
+                        else {
+                            inputParam = std::make_shared<ngraph::opset3::Parameter>(
+                                ngraph::element::i16, ngraph::Shape(dims.begin(), dims.end()));
+                        }
+                        ALOGE("createInputParams created inputIndex %d, type %d", i,
+                            nnapiOperand.type);
+                        break;
                     default:
                         ALOGE("createInputParams Failure at inputIndex %d, type %d", i,
                               nnapiOperand.type);
diff --git a/ngraph_creator/src/NgraphNodes.cpp b/ngraph_creator/src/NgraphNodes.cpp
index f0aa8f734..16600bd6a 100644
--- a/ngraph_creator/src/NgraphNodes.cpp
+++ b/ngraph_creator/src/NgraphNodes.cpp
@@ -28,7 +28,11 @@ ngraph::Output<ngraph::Node> NgraphNodes::getOperationOutput(size_t index) {
 
 void NgraphNodes::setResultNode(size_t outputIndex, std::shared_ptr<ngraph::Node> resultNode) {
     ALOGD("setResultNode %zu", outputIndex);
-    mResultNodes.push_back(resultNode);
+    mResultNodes.push_back(std::make_shared<ngraph::op::Result>(resultNode));
+}
+
+void NgraphNodes::setSinkNode(std::shared_ptr<ngraph::op::Sink> sinkNode) {
+    mSinkNodes.push_back(sinkNode);
 }
 
 const std::string& NgraphNodes::getNodeName(size_t index) {
@@ -50,7 +54,8 @@ void NgraphNodes::removeInputParameter(std::string name, size_t index) {
 }
 
 std::shared_ptr<ngraph::Function> NgraphNodes::generateGraph() {
-    return std::make_shared<ngraph::Function>(mResultNodes, mInputParams);
+    ngraph::SinkVector sinks {mSinkNodes};
+    return std::make_shared<ngraph::Function>(mResultNodes, sinks, mInputParams);
 }
 
 void NgraphNodes::setInvalidNode(size_t index) { mNodeNames[index] = ""; }
diff --git a/ngraph_creator/src/OperationsFactory.cpp b/ngraph_creator/src/OperationsFactory.cpp
index f72f9ffb6..d5c13d625 100755
--- a/ngraph_creator/src/OperationsFactory.cpp
+++ b/ngraph_creator/src/OperationsFactory.cpp
@@ -109,6 +109,8 @@ std::shared_ptr<OperationsBase> OperationsFactory::getOperation(
             return std::make_shared<Pow>(operationIndex);
         case OperationType::QUANTIZE:
             return std::make_shared<Quantize>(operationIndex);
+        case OperationType::QUANTIZED_LSTM:
+            return std::make_shared<QuantizedLSTM>(operationIndex);
         case OperationType::REDUCE_ALL:
             return std::make_shared<Reduce_All>(operationIndex);
         case OperationType::REDUCE_ANY:
diff --git a/utils.h b/utils.h
index b7eaf57a3..313781ae6 100755
--- a/utils.h
+++ b/utils.h
@@ -24,6 +24,7 @@
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <fstream>
+#include <iostream>
 #include "Driver.h"
 #include "IENetwork.h"
 // May be move these out of utils??