diff --git a/lib/mps/matrix.jl b/lib/mps/matrix.jl index c971a3218..a9d18fee6 100644 --- a/lib/mps/matrix.jl +++ b/lib/mps/matrix.jl @@ -211,7 +211,7 @@ function encode!(cmdbuf::MTLCommandBuffer, matmul::MPSMatrixMultiplication, left end """ - matMulMPS(a::MtlMatrix, b::MtlMatrix, c::MtlMatrix, alpha=1, beta=1, + matmul!(a::MtlMatrix, b::MtlMatrix, c::MtlMatrix, alpha=1, beta=1, transpose_left=false, transpose_right=false) A `MPSMatrixMultiplication` kernel thay computes: `c = alpha * op(a) * beta * op(b) + beta * C` diff --git a/lib/mps/matrixrandom.jl b/lib/mps/matrixrandom.jl index e4d80ae35..cca50a95a 100644 --- a/lib/mps/matrixrandom.jl +++ b/lib/mps/matrixrandom.jl @@ -16,7 +16,7 @@ export MPSMatrixRandomDistributionDescriptor @autoproperty distributionType::MPSMatrixRandomDistribution @autoproperty maximum::Float32 setter=setMaximum @autoproperty mean::Float32 setter=setMean - @autoproperty minimum::Float32 setter=setMimimum + @autoproperty minimum::Float32 setter=setMinimum @autoproperty standardDeviation::Float32 setter=setStandardDeviation end diff --git a/lib/mps/ndarray.jl b/lib/mps/ndarray.jl index d55b87600..2bdeec285 100644 --- a/lib/mps/ndarray.jl +++ b/lib/mps/ndarray.jl @@ -82,7 +82,7 @@ end end function MPSTemporaryNDArray(cmdbuf::MTLCommandBuffer, descriptor::MPSNDArrayDescriptor) - @objc [MPSNDTemporaryNDArray temporaryNDArrayWithCommandBuffer:cmdbuf::id{MTLCommandBuffer} + @objc [MPSTemporaryNDArray temporaryNDArrayWithCommandBuffer:cmdbuf::id{MTLCommandBuffer} descriptor:descriptor::id{MPSNDArrayDescriptor}]::id{MPSTemporaryNDArray} return obj end @@ -123,7 +123,7 @@ end return obj end else - function MPSNDArray(buffer::MTLBuffer, offset::UInt, descriptor::MPSNDArrayDescriptor) + function MPSNDArray(_::MTLBuffer, _::UInt, _::MPSNDArrayDescriptor) @assert false "Creating an MPSNDArray that shares data with user-provided MTLBuffer is only supported in macOS v15+" end end @@ -135,20 +135,18 @@ function MPSNDArray(arr::MtlArray{T,N}) where {T,N} return MPSNDArray(arr.data[], UInt(arr.offset), desc) end -function Metal.MtlArray(ndarr::MPSNDArray; storage = Metal.DefaultStorageMode) +function Metal.MtlArray(ndarr::MPSNDArray; storage = Metal.DefaultStorageMode, async = false) ndims = Int(ndarr.numberOfDimensions) arrsize = [lengthOfDimension(ndarr,i) for i in 0:ndims-1] T = convert(DataType, ndarr.dataType) arr = MtlArray{T,ndims,storage}(undef, reverse(arrsize)...) dev = device(arr) - cmdBuf = MTLCommandBuffer(global_queue(dev)) - - exportDataWithCommandBuffer(ndarr, cmdBuf, arr.data[], T, 0, collect(sizeof(T) .* reverse(strides(arr)))) - - commit!(cmdBuf) - wait_completed(cmdBuf) + cmdBuf = MTLCommandBuffer(global_queue(dev)) do cmdBuf + exportDataWithCommandBuffer(ndarr, cmdBuf, arr.data[], T, 0, collect(sizeof(T) .* reverse(strides(arr)))) + end + async || wait_completed(cmdBuf) return arr end diff --git a/lib/mtl/device.jl b/lib/mtl/device.jl index 9297fe344..0747006e4 100644 --- a/lib/mtl/device.jl +++ b/lib/mtl/device.jl @@ -91,7 +91,7 @@ MTLDevice(i::Integer) = devices()[i] # family # -export supports_family, is_m3, is_m2, is_m1 +export supports_family, is_m4, is_m3, is_m2, is_m1 @cenum MTLGPUFamily::NSInteger begin MTLGPUFamilyMetal3 = 5001 # Metal 3 support @@ -121,4 +121,7 @@ is_m1(dev::MTLDevice) = supports_family(dev, MTLGPUFamilyApple7) && !supports_family(dev, MTLGPUFamilyApple8) is_m2(dev::MTLDevice) = supports_family(dev, MTLGPUFamilyApple8) && !supports_family(dev, MTLGPUFamilyApple9) -is_m3(dev::MTLDevice) = supports_family(dev, MTLGPUFamilyApple9) +is_m3(dev::MTLDevice) = supports_family(dev, MTLGPUFamilyApple9) && + occursin("M3", String(dev.name)) +is_m4(dev::MTLDevice) = supports_family(dev, MTLGPUFamilyApple9) && + occursin("M4", String(dev.name))