JBAI-4393 [core, ndarray] Refactored memory management and array hand…

…ling Added manual NDArray handling, refactored existing operations to use standard DataType enum instead of ArrayTypes, and optimized memory allocations across multiple modules.
JetBrains-Research · Aug 21, 2024 · f334632 · f334632
1 parent f1a9296
commit f334632
Show file tree

Hide file tree

Showing 11 changed files with 259 additions and 186 deletions.
diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/model/KIModel.kt
@@ -24,7 +24,7 @@ class KIModel(
 
     @OptIn(ExperimentalCoroutinesApi::class)
     private val dispatcher: CoroutineDispatcher = Dispatchers.Default.limitedParallelism(parallelismLimit)
-    private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(memoryLimiter)
+    private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(MemoryLimiters.DefaultManualAllocator)
 
     override fun addProfilingContext(name: String): ProfilingContext = ProfilingContext(name).apply { profiles.add(this) }
     override fun analyzeProfilingResults(): ProfileAnalysisEntry = profiles.analyze("Model $name")
@@ -44,7 +44,7 @@ class KIModel(
                 coreReserved = true
             }
 
-            when (memoryLimiter) {
+            when (MemoryLimiters.DefaultManualAllocator) {
                 MemoryLimiters.NoAllocator -> {
                     withContext(limiterContext) {
                         return@withContext graph.execute(input, contexts)

diff --git a/...vmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt b/...vmMain/kotlin/io/kinference.core/operators/layer/normalization/EmbedLayerNormalization.kt
@@ -1,15 +1,17 @@
 package io.kinference.core.operators.layer.normalization
 
 import io.kinference.attribute.Attribute
-import io.kinference.core.data.tensor.KITensor
-import io.kinference.core.data.tensor.asONNXTensors
+import io.kinference.core.data.tensor.*
 import io.kinference.data.ONNXData
 import io.kinference.graph.Contexts
 import io.kinference.ndarray.arrays.*
+import io.kinference.ndarray.arrays.memory.ManualAllocatorContext
 import io.kinference.ndarray.arrays.pointers.*
 import io.kinference.operator.*
+import io.kinference.primitives.types.DataType
 import io.kinference.protobuf.message.AttributeProto.AttributeType
 import io.kinference.protobuf.message.TensorProto
+import kotlin.coroutines.coroutineContext
 import kotlin.math.sqrt
 
 sealed class EmbedLayerNormalization(
@@ -73,9 +75,12 @@ class EmbedLayerNormalizationVer1(
 
         private data class NormalizeResult(val output: FloatNDArray, val embeddingSum: FloatNDArray)
 
-        internal suspend fun createMaskIndices(mask: IntNDArray?, batchSize: Int, seqLen: Int): NumberNDArrayCore {
-            val maskIndices = MutableIntNDArray(intArrayOf(batchSize))
-            if (mask == null) return maskIndices
+        internal suspend fun createMaskIndices(mask: IntNDArray?, batchSize: Int, seqLen: Int, context: ManualAllocatorContext? = null): NumberNDArrayCore {
+            val strides = Strides(intArrayOf(batchSize))
+            val maskIndices = (context?.getNDArray(DataType.INT, strides) ?: MutableIntNDArray(strides)) as MutableIntNDArray
+
+            if (mask == null)
+                return maskIndices.also { it.fill(0) }
 
             val pointer = mask.array.pointer()
             val maskIndicesPointer = maskIndices.array.pointer()
@@ -95,12 +100,15 @@ class EmbedLayerNormalizationVer1(
 
         private suspend fun normalize(
             epsilon: Float, inputIds: IntNDArray, segmentIds: IntNDArray?, wordEmbed: FloatNDArray, posEmbed: FloatNDArray,
-            segmentEmbed: FloatNDArray?, gamma: FloatNDArray, beta: FloatNDArray, positionIds: IntNDArray?
+            segmentEmbed: FloatNDArray?, gamma: FloatNDArray, beta: FloatNDArray, positionIds: IntNDArray?, context: ManualAllocatorContext? = null
         ): NormalizeResult {
             val (batchSize, seqLen) = inputIds.shape
             val (_, hiddenSize) = wordEmbed.shape
-            val output = MutableFloatNDArray(intArrayOf(batchSize, seqLen, hiddenSize))
-            val embeddingSum = MutableFloatNDArray(intArrayOf(batchSize, seqLen, hiddenSize))
+
+            val outputStrides = Strides(intArrayOf(batchSize, seqLen, hiddenSize))
+
+            val output = (context?.getNDArray(DataType.FLOAT, outputStrides, fillZeros = false) ?: MutableFloatNDArray(outputStrides)) as MutableFloatNDArray
+            val embeddingSum = (context?.getNDArray(DataType.FLOAT, outputStrides, fillZeros = false) ?: MutableFloatNDArray(outputStrides)) as MutableFloatNDArray
 
             for (batch in 0 until batchSize) {
                 val blockIdx = batch * seqLen
@@ -167,6 +175,8 @@ class EmbedLayerNormalizationVer1(
     }
 
     override suspend fun <D : ONNXData<*, *>> apply(contexts: Contexts<D>, inputs: List<KITensor?>): List<KITensor?> {
+        val manualContext = coroutineContext[ManualAllocatorContext.Key]
+
         val inputIds = inputs[0]!!.data as IntNDArray
         val segmentIds = inputs[1]?.data as IntNDArray?
         val wordEmbed = inputs[2]!!.data as FloatNDArray
@@ -177,8 +187,12 @@ class EmbedLayerNormalizationVer1(
         val mask = inputs.getOrNull(7)?.data as IntNDArray?
         val positionIds = inputs.getOrNull(8)?.data as IntNDArray?
 
-        val (normalized, embedSum) = normalize(epsilon, inputIds, segmentIds, wordEmbed, posEmbed, segmentEmbed, gamma, beta, positionIds)
+        val (normalized, embedSum) = normalize(epsilon, inputIds, segmentIds, wordEmbed, posEmbed, segmentEmbed, gamma, beta, positionIds, manualContext)
         val maskIndices = createMaskIndices(mask, inputIds.shape[0], inputIds.shape[1])
-        return listOf(normalized, maskIndices, embedSum).asONNXTensors(outputs)
+        return listOf(
+            normalized.asTensor(context = manualContext),
+            maskIndices.asTensor(context = manualContext),
+            embedSum.asTensor(context = manualContext)
+        )
     }
 }
diff --git a/...jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt b/...jvmMain/kotlin/io/kinference.core/operators/layer/normalization/SkipLayerNormalization.kt
@@ -7,10 +7,13 @@ import io.kinference.data.ONNXData
 import io.kinference.graph.Contexts
 import io.kinference.ndarray.arrays.FloatNDArray
 import io.kinference.ndarray.arrays.MutableFloatNDArray
+import io.kinference.ndarray.arrays.memory.ManualAllocatorContext
 import io.kinference.ndarray.arrays.pointers.*
 import io.kinference.operator.*
+import io.kinference.primitives.types.DataType
 import io.kinference.protobuf.message.AttributeProto
 import io.kinference.protobuf.message.TensorProto
+import kotlin.coroutines.coroutineContext
 import kotlin.math.sqrt
 
 sealed class SkipLayerNormalization(name: String, info: OperatorInfo, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) : Operator<KITensor, KITensor>(name, info, attributes, inputs, outputs) {
@@ -104,8 +107,10 @@ class SkipLayerNormalizationVer1(name: String, attributes: Map<String, Attribute
 
 
     override suspend fun <D : ONNXData<*, *>> apply(contexts: Contexts<D>, inputs: List<KITensor?>): List<KITensor?> {
+        val manualContext = coroutineContext[ManualAllocatorContext.Key]
+
         val input = inputs[0]!!.data as FloatNDArray
-        val output = MutableFloatNDArray(input.strides)
+        val output = (manualContext?.getNDArray(DataType.FLOAT, input.strides, fillZeros = false) ?: MutableFloatNDArray(input.strides)) as MutableFloatNDArray
         input.normalize(
             skip = inputs[1]!!.data as FloatNDArray,
             gamma = inputs[2]!!.data as FloatNDArray,
@@ -114,6 +119,7 @@ class SkipLayerNormalizationVer1(name: String, attributes: Map<String, Attribute
             epsilon = epsilon,
             dst = output
         )
-        return listOf(output.asTensor())
+        // Do we need to pass context here??
+        return listOf(output.asTensor(context = manualContext))
     }
 }
diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/Add.kt
@@ -5,9 +5,12 @@ import io.kinference.core.data.tensor.KITensor
 import io.kinference.core.data.tensor.asTensor
 import io.kinference.data.ONNXData
 import io.kinference.graph.Contexts
-import io.kinference.ndarray.arrays.NumberNDArrayCore
+import io.kinference.ndarray.arrays.*
+import io.kinference.ndarray.arrays.memory.ManualAllocatorContext
+import io.kinference.ndarray.extensions.allocateNDArray
 import io.kinference.operator.*
 import io.kinference.protobuf.message.TensorProto
+import kotlin.coroutines.coroutineContext
 
 sealed class Add(name: String, info: OperatorInfo, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) : Operator<KITensor, KITensor>(name, info, attributes, inputs, outputs) {
     companion object {
@@ -52,7 +55,16 @@ class AddVer7(name: String, attributes: Map<String, Attribute<Any>>, inputs: Lis
     }
 
     override suspend fun <D : ONNXData<*, *>> apply(contexts: Contexts<D>, inputs: List<KITensor?>): List<KITensor?> {
-        val result = (inputs[0]!!.data as NumberNDArrayCore) + (inputs[1]!!.data as NumberNDArrayCore)
-        return listOf(result.asTensor("C"))
+        val manualContext = coroutineContext[ManualAllocatorContext.Key]
+
+        val left = inputs[0]!!.data as NumberNDArrayCore
+        val right = inputs[1]!!.data as NumberNDArrayCore
+
+        val destShape = broadcastShape(listOf(left.shape, right.shape))
+        val destStrides = Strides(destShape)
+        val dest = (manualContext?.getNDArray(left.type, destStrides) ?: allocateNDArray(left.type, destStrides)) as MutableNumberNDArrayCore
+
+        val result = left.plus(right, dest) //(inputs[0]!!.data as NumberNDArrayCore) + (inputs[1]!!.data as NumberNDArrayCore)
+        return listOf(result.asTensor("C", manualContext))
     }
 }
diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/BiasGelu.kt
@@ -5,9 +5,13 @@ import io.kinference.core.data.tensor.KITensor
 import io.kinference.core.data.tensor.asTensor
 import io.kinference.data.ONNXData
 import io.kinference.graph.Contexts
+import io.kinference.ndarray.arrays.MutableNumberNDArrayCore
 import io.kinference.ndarray.arrays.NumberNDArrayCore
+import io.kinference.ndarray.arrays.memory.ManualAllocatorContext
+import io.kinference.ndarray.extensions.allocateNDArray
 import io.kinference.ndarray.extensions.gelu.biasGelu
 import io.kinference.operator.*
+import kotlin.coroutines.coroutineContext
 
 sealed class BiasGelu(name: String, info: OperatorInfo, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) : Operator<KITensor, KITensor>(name, info, attributes, inputs, outputs) {
     companion object {
@@ -39,16 +43,20 @@ class BiasGeluVer1(name: String, attributes: Map<String, Attribute<Any>> = empty
     }
 
     override suspend fun <D : ONNXData<*, *>> apply(contexts: Contexts<D>, inputs: List<KITensor?>): List<KITensor?> {
+        val manualContext = coroutineContext[ManualAllocatorContext.Key]
+
         val input = inputs[0]!!.data as NumberNDArrayCore
         val bias = inputs[1]!!.data as NumberNDArrayCore
 
         require(input.shape.last() == bias.shape.last()) { "Last dimensions of input and bias tensors must be equal" }
 
+        val dest = (manualContext?.getNDArray(input.type, input.strides) ?: allocateNDArray(input.type, input.strides)) as MutableNumberNDArrayCore
+
         // Uses ERF formula with fractional error less than x.xx * 10 ^ -4.
         // Algorithm 26.2.17 in Abromowitz and Stegun, Handbook of Mathematical.
         // Another possible ERF implementation (several ms faster):
         // https://github.com/apache/commons-numbers/blob/master/commons-numbers-gamma/src/main/java/org/apache/commons/numbers/gamma/BoostErf.java
 
-        return listOf(biasGelu(input, bias).asTensor("C"))
+        return listOf(biasGelu(input, bias, dest).asTensor("C", manualContext))
     }
 }
diff --git a/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt b/inference/inference-core/src/jvmMain/kotlin/io/kinference.core/operators/math/MatMul.kt
@@ -5,9 +5,13 @@ import io.kinference.core.data.tensor.KITensor
 import io.kinference.core.data.tensor.asTensor
 import io.kinference.data.ONNXData
 import io.kinference.graph.Contexts
-import io.kinference.ndarray.arrays.NumberNDArrayCore
+import io.kinference.ndarray.arrays.*
+import io.kinference.ndarray.arrays.memory.ManualAllocatorContext
+import io.kinference.ndarray.broadcasting.Broadcasting
+import io.kinference.ndarray.extensions.allocateNDArray
 import io.kinference.operator.*
 import io.kinference.protobuf.message.TensorProto
+import kotlin.coroutines.coroutineContext
 
 sealed class MatMul(name: String, info: OperatorInfo, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) : Operator<KITensor, KITensor>(name, info, attributes, inputs, outputs) {
     companion object {
@@ -46,8 +50,16 @@ class MatMulVer1(name: String, attributes: Map<String, Attribute<Any>>, inputs:
     }
 
     override suspend fun <D : ONNXData<*, *>> apply(contexts: Contexts<D>, inputs: List<KITensor?>): List<KITensor?> {
+        val manualContext = coroutineContext[ManualAllocatorContext.Key]
+
         val first = inputs[0]!!.data as NumberNDArrayCore
         val second = inputs[1]!!.data as NumberNDArrayCore
-        return listOf((first.matmul(second)).asTensor("Y"))
+
+        val destShape = Broadcasting.broadcastShapeForMatmul(first.shape, second.shape)
+        val destStrides = Strides(destShape)
+
+        val dest = (manualContext?.getNDArray(first.type, destStrides, fillZeros = true) ?: allocateNDArray(first.type, destStrides)) as MutableNumberNDArrayCore
+
+        return listOf((first.matmul(second, dest)).asTensor("Y", manualContext))
     }
 }