Skip to content

Commit

Permalink
JBAI-4393 [core, ndarray] Refactored memory management and array hand…
Browse files Browse the repository at this point in the history
…ling

Added manual NDArray handling, refactored existing operations to use standard DataType enum instead of ArrayTypes, and optimized memory allocations across multiple modules.
  • Loading branch information
dmitriyb committed Aug 21, 2024
1 parent f1a9296 commit f334632
Show file tree
Hide file tree
Showing 11 changed files with 259 additions and 186 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class KIModel(

@OptIn(ExperimentalCoroutinesApi::class)
private val dispatcher: CoroutineDispatcher = Dispatchers.Default.limitedParallelism(parallelismLimit)
private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(memoryLimiter)
private val modelArrayStorage: ModelArrayStorage = ModelArrayStorage(MemoryLimiters.DefaultManualAllocator)

override fun addProfilingContext(name: String): ProfilingContext = ProfilingContext(name).apply { profiles.add(this) }
override fun analyzeProfilingResults(): ProfileAnalysisEntry = profiles.analyze("Model $name")
Expand All @@ -44,7 +44,7 @@ class KIModel(
coreReserved = true
}

when (memoryLimiter) {
when (MemoryLimiters.DefaultManualAllocator) {
MemoryLimiters.NoAllocator -> {
withContext(limiterContext) {
return@withContext graph.execute(input, contexts)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
package io.kinference.core.operators.layer.normalization

import io.kinference.attribute.Attribute
import io.kinference.core.data.tensor.KITensor
import io.kinference.core.data.tensor.asONNXTensors
import io.kinference.core.data.tensor.*
import io.kinference.data.ONNXData
import io.kinference.graph.Contexts
import io.kinference.ndarray.arrays.*
import io.kinference.ndarray.arrays.memory.ManualAllocatorContext
import io.kinference.ndarray.arrays.pointers.*
import io.kinference.operator.*
import io.kinference.primitives.types.DataType
import io.kinference.protobuf.message.AttributeProto.AttributeType
import io.kinference.protobuf.message.TensorProto
import kotlin.coroutines.coroutineContext
import kotlin.math.sqrt

sealed class EmbedLayerNormalization(
Expand Down Expand Up @@ -73,9 +75,12 @@ class EmbedLayerNormalizationVer1(

private data class NormalizeResult(val output: FloatNDArray, val embeddingSum: FloatNDArray)

internal suspend fun createMaskIndices(mask: IntNDArray?, batchSize: Int, seqLen: Int): NumberNDArrayCore {
val maskIndices = MutableIntNDArray(intArrayOf(batchSize))
if (mask == null) return maskIndices
internal suspend fun createMaskIndices(mask: IntNDArray?, batchSize: Int, seqLen: Int, context: ManualAllocatorContext? = null): NumberNDArrayCore {
val strides = Strides(intArrayOf(batchSize))
val maskIndices = (context?.getNDArray(DataType.INT, strides) ?: MutableIntNDArray(strides)) as MutableIntNDArray

if (mask == null)
return maskIndices.also { it.fill(0) }

val pointer = mask.array.pointer()
val maskIndicesPointer = maskIndices.array.pointer()
Expand All @@ -95,12 +100,15 @@ class EmbedLayerNormalizationVer1(

private suspend fun normalize(
epsilon: Float, inputIds: IntNDArray, segmentIds: IntNDArray?, wordEmbed: FloatNDArray, posEmbed: FloatNDArray,
segmentEmbed: FloatNDArray?, gamma: FloatNDArray, beta: FloatNDArray, positionIds: IntNDArray?
segmentEmbed: FloatNDArray?, gamma: FloatNDArray, beta: FloatNDArray, positionIds: IntNDArray?, context: ManualAllocatorContext? = null
): NormalizeResult {
val (batchSize, seqLen) = inputIds.shape
val (_, hiddenSize) = wordEmbed.shape
val output = MutableFloatNDArray(intArrayOf(batchSize, seqLen, hiddenSize))
val embeddingSum = MutableFloatNDArray(intArrayOf(batchSize, seqLen, hiddenSize))

val outputStrides = Strides(intArrayOf(batchSize, seqLen, hiddenSize))

val output = (context?.getNDArray(DataType.FLOAT, outputStrides, fillZeros = false) ?: MutableFloatNDArray(outputStrides)) as MutableFloatNDArray
val embeddingSum = (context?.getNDArray(DataType.FLOAT, outputStrides, fillZeros = false) ?: MutableFloatNDArray(outputStrides)) as MutableFloatNDArray

for (batch in 0 until batchSize) {
val blockIdx = batch * seqLen
Expand Down Expand Up @@ -167,6 +175,8 @@ class EmbedLayerNormalizationVer1(
}

override suspend fun <D : ONNXData<*, *>> apply(contexts: Contexts<D>, inputs: List<KITensor?>): List<KITensor?> {
val manualContext = coroutineContext[ManualAllocatorContext.Key]

val inputIds = inputs[0]!!.data as IntNDArray
val segmentIds = inputs[1]?.data as IntNDArray?
val wordEmbed = inputs[2]!!.data as FloatNDArray
Expand All @@ -177,8 +187,12 @@ class EmbedLayerNormalizationVer1(
val mask = inputs.getOrNull(7)?.data as IntNDArray?
val positionIds = inputs.getOrNull(8)?.data as IntNDArray?

val (normalized, embedSum) = normalize(epsilon, inputIds, segmentIds, wordEmbed, posEmbed, segmentEmbed, gamma, beta, positionIds)
val (normalized, embedSum) = normalize(epsilon, inputIds, segmentIds, wordEmbed, posEmbed, segmentEmbed, gamma, beta, positionIds, manualContext)
val maskIndices = createMaskIndices(mask, inputIds.shape[0], inputIds.shape[1])
return listOf(normalized, maskIndices, embedSum).asONNXTensors(outputs)
return listOf(
normalized.asTensor(context = manualContext),
maskIndices.asTensor(context = manualContext),
embedSum.asTensor(context = manualContext)
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ import io.kinference.data.ONNXData
import io.kinference.graph.Contexts
import io.kinference.ndarray.arrays.FloatNDArray
import io.kinference.ndarray.arrays.MutableFloatNDArray
import io.kinference.ndarray.arrays.memory.ManualAllocatorContext
import io.kinference.ndarray.arrays.pointers.*
import io.kinference.operator.*
import io.kinference.primitives.types.DataType
import io.kinference.protobuf.message.AttributeProto
import io.kinference.protobuf.message.TensorProto
import kotlin.coroutines.coroutineContext
import kotlin.math.sqrt

sealed class SkipLayerNormalization(name: String, info: OperatorInfo, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) : Operator<KITensor, KITensor>(name, info, attributes, inputs, outputs) {
Expand Down Expand Up @@ -104,8 +107,10 @@ class SkipLayerNormalizationVer1(name: String, attributes: Map<String, Attribute


override suspend fun <D : ONNXData<*, *>> apply(contexts: Contexts<D>, inputs: List<KITensor?>): List<KITensor?> {
val manualContext = coroutineContext[ManualAllocatorContext.Key]

val input = inputs[0]!!.data as FloatNDArray
val output = MutableFloatNDArray(input.strides)
val output = (manualContext?.getNDArray(DataType.FLOAT, input.strides, fillZeros = false) ?: MutableFloatNDArray(input.strides)) as MutableFloatNDArray
input.normalize(
skip = inputs[1]!!.data as FloatNDArray,
gamma = inputs[2]!!.data as FloatNDArray,
Expand All @@ -114,6 +119,7 @@ class SkipLayerNormalizationVer1(name: String, attributes: Map<String, Attribute
epsilon = epsilon,
dst = output
)
return listOf(output.asTensor())
// Do we need to pass context here??
return listOf(output.asTensor(context = manualContext))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ import io.kinference.core.data.tensor.KITensor
import io.kinference.core.data.tensor.asTensor
import io.kinference.data.ONNXData
import io.kinference.graph.Contexts
import io.kinference.ndarray.arrays.NumberNDArrayCore
import io.kinference.ndarray.arrays.*
import io.kinference.ndarray.arrays.memory.ManualAllocatorContext
import io.kinference.ndarray.extensions.allocateNDArray
import io.kinference.operator.*
import io.kinference.protobuf.message.TensorProto
import kotlin.coroutines.coroutineContext

sealed class Add(name: String, info: OperatorInfo, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) : Operator<KITensor, KITensor>(name, info, attributes, inputs, outputs) {
companion object {
Expand Down Expand Up @@ -52,7 +55,16 @@ class AddVer7(name: String, attributes: Map<String, Attribute<Any>>, inputs: Lis
}

override suspend fun <D : ONNXData<*, *>> apply(contexts: Contexts<D>, inputs: List<KITensor?>): List<KITensor?> {
val result = (inputs[0]!!.data as NumberNDArrayCore) + (inputs[1]!!.data as NumberNDArrayCore)
return listOf(result.asTensor("C"))
val manualContext = coroutineContext[ManualAllocatorContext.Key]

val left = inputs[0]!!.data as NumberNDArrayCore
val right = inputs[1]!!.data as NumberNDArrayCore

val destShape = broadcastShape(listOf(left.shape, right.shape))
val destStrides = Strides(destShape)
val dest = (manualContext?.getNDArray(left.type, destStrides) ?: allocateNDArray(left.type, destStrides)) as MutableNumberNDArrayCore

val result = left.plus(right, dest) //(inputs[0]!!.data as NumberNDArrayCore) + (inputs[1]!!.data as NumberNDArrayCore)
return listOf(result.asTensor("C", manualContext))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ import io.kinference.core.data.tensor.KITensor
import io.kinference.core.data.tensor.asTensor
import io.kinference.data.ONNXData
import io.kinference.graph.Contexts
import io.kinference.ndarray.arrays.MutableNumberNDArrayCore
import io.kinference.ndarray.arrays.NumberNDArrayCore
import io.kinference.ndarray.arrays.memory.ManualAllocatorContext
import io.kinference.ndarray.extensions.allocateNDArray
import io.kinference.ndarray.extensions.gelu.biasGelu
import io.kinference.operator.*
import kotlin.coroutines.coroutineContext

sealed class BiasGelu(name: String, info: OperatorInfo, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) : Operator<KITensor, KITensor>(name, info, attributes, inputs, outputs) {
companion object {
Expand Down Expand Up @@ -39,16 +43,20 @@ class BiasGeluVer1(name: String, attributes: Map<String, Attribute<Any>> = empty
}

override suspend fun <D : ONNXData<*, *>> apply(contexts: Contexts<D>, inputs: List<KITensor?>): List<KITensor?> {
val manualContext = coroutineContext[ManualAllocatorContext.Key]

val input = inputs[0]!!.data as NumberNDArrayCore
val bias = inputs[1]!!.data as NumberNDArrayCore

require(input.shape.last() == bias.shape.last()) { "Last dimensions of input and bias tensors must be equal" }

val dest = (manualContext?.getNDArray(input.type, input.strides) ?: allocateNDArray(input.type, input.strides)) as MutableNumberNDArrayCore

// Uses ERF formula with fractional error less than x.xx * 10 ^ -4.
// Algorithm 26.2.17 in Abromowitz and Stegun, Handbook of Mathematical.
// Another possible ERF implementation (several ms faster):
// https://github.com/apache/commons-numbers/blob/master/commons-numbers-gamma/src/main/java/org/apache/commons/numbers/gamma/BoostErf.java

return listOf(biasGelu(input, bias).asTensor("C"))
return listOf(biasGelu(input, bias, dest).asTensor("C", manualContext))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@ import io.kinference.core.data.tensor.KITensor
import io.kinference.core.data.tensor.asTensor
import io.kinference.data.ONNXData
import io.kinference.graph.Contexts
import io.kinference.ndarray.arrays.NumberNDArrayCore
import io.kinference.ndarray.arrays.*
import io.kinference.ndarray.arrays.memory.ManualAllocatorContext
import io.kinference.ndarray.broadcasting.Broadcasting
import io.kinference.ndarray.extensions.allocateNDArray
import io.kinference.operator.*
import io.kinference.protobuf.message.TensorProto
import kotlin.coroutines.coroutineContext

sealed class MatMul(name: String, info: OperatorInfo, attributes: Map<String, Attribute<Any>>, inputs: List<String>, outputs: List<String>) : Operator<KITensor, KITensor>(name, info, attributes, inputs, outputs) {
companion object {
Expand Down Expand Up @@ -46,8 +50,16 @@ class MatMulVer1(name: String, attributes: Map<String, Attribute<Any>>, inputs:
}

override suspend fun <D : ONNXData<*, *>> apply(contexts: Contexts<D>, inputs: List<KITensor?>): List<KITensor?> {
val manualContext = coroutineContext[ManualAllocatorContext.Key]

val first = inputs[0]!!.data as NumberNDArrayCore
val second = inputs[1]!!.data as NumberNDArrayCore
return listOf((first.matmul(second)).asTensor("Y"))

val destShape = Broadcasting.broadcastShapeForMatmul(first.shape, second.shape)
val destStrides = Strides(destShape)

val dest = (manualContext?.getNDArray(first.type, destStrides, fillZeros = true) ?: allocateNDArray(first.type, destStrides)) as MutableNumberNDArrayCore

return listOf((first.matmul(second, dest)).asTensor("Y", manualContext))
}
}
Loading

0 comments on commit f334632

Please sign in to comment.