From ac8a03e0a1408e4879473111205251b15f131e6d Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Wed, 27 Mar 2024 11:13:47 -0300 Subject: [PATCH 01/15] Add MPSMatrixRandom --- lib/mps/MPS.jl | 1 + lib/mps/matrixrandom.jl | 146 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 lib/mps/matrixrandom.jl diff --git a/lib/mps/MPS.jl b/lib/mps/MPS.jl index 2bb794187..e33a156b9 100644 --- a/lib/mps/MPS.jl +++ b/lib/mps/MPS.jl @@ -28,6 +28,7 @@ include("kernel.jl") include("images.jl") include("matrix.jl") include("vector.jl") +include("matrixrandom.jl") include("decomposition.jl") include("copy.jl") diff --git a/lib/mps/matrixrandom.jl b/lib/mps/matrixrandom.jl new file mode 100644 index 000000000..0d5eb8741 --- /dev/null +++ b/lib/mps/matrixrandom.jl @@ -0,0 +1,146 @@ +@cenum MPSMatrixRandomDistribution::UInt begin + MPSMatrixRandomDistributionDefault = 1 + MPSMatrixRandomDistributionUniform = 2 + MPSMatrixRandomDistributionNormal = 3 +end + +# +# matrix random descriptor +# + +export MPSMatrixRandomDistributionDescriptor + +@objcwrapper immutable=false MPSMatrixRandomDistributionDescriptor <: NSObject + +@objcproperties MPSMatrixRandomDistributionDescriptor begin + @autoproperty distributionType::MPSMatrixRandomDistribution + @autoproperty maximum::Float32 setter=setMaximum + @autoproperty mean::Float32 setter=setMean + @autoproperty minimum::Float32 setter=setMimimum + @autoproperty standardDeviation::Float32 setter=setStandardDeviation +end + + +function MPSMatrixRandomDefaultDistributionDescriptor() + desc = @objc [MPSMatrixRandomDistributionDescriptor defaultDistributionDescriptor]::id{MPSMatrixRandomDistributionDescriptor} + obj = MPSMatrixRandomDistributionDescriptor(desc) + # XXX: who releases this object? + return obj +end + +# Default constructor +MPSMatrixRandomDistributionDescriptor() = MPSMatrixRandomDefaultDistributionDescriptor() + +function MPSMatrixRandomNormalDistributionDescriptor(mean, standardDeviation) + desc = @objc [MPSMatrixRandomDistributionDescriptor normalDistributionDescriptorWithMean:mean::Float32 + standardDeviation:standardDeviation::Float32]::id{MPSMatrixRandomDistributionDescriptor} + obj = MPSMatrixRandomDistributionDescriptor(desc) + # XXX: who releases this object? + return obj +end + +function MPSMatrixRandomNormalDistributionDescriptor(mean, standardDeviation, minimum, maximum) + desc = @objc [MPSMatrixRandomDistributionDescriptor normalDistributionDescriptorWithMean:mean::Float32 + standardDeviation:standardDeviation::Float32 + minimum:minimum::Float32 + maximum:maximum::Float32]::id{MPSMatrixRandomDistributionDescriptor} + obj = MPSMatrixRandomDistributionDescriptor(desc) + # XXX: who releases this object? + return obj +end + +function MPSMatrixRandomUniformDistributionDescriptor(minimum, maximum) + desc = @objc [MPSMatrixRandomDistributionDescriptor uniformDistributionDescriptorWithMinimum:minimum::Float32 + maximum:maximum::Float32]::id{MPSMatrixRandomDistributionDescriptor} + obj = MPSMatrixRandomDistributionDescriptor(desc) + # XXX: who releases this object? + return obj +end + + +@objcwrapper immutable=false MPSMatrixRandom <: MPSKernel + +@objcproperties MPSMatrixRandom begin + @autoproperty batchSize::NSUInteger + @autoproperty batchStart::NSUInteger + @autoproperty destinationDataType::id{MPSDataType} + @autoproperty distributionType::id{MPSMatrixRandomDistributionDescriptor} +end + +function encode!(cmdbuf::MTLCommandBuffer, kernel::K, destinationMatrix::MPSMatrix) where {K<:MPSMatrixRandom} + @objc [kernel::id{K} encodeToCommandBuffer:cmdbuf::id{MTLCommandBuffer} + destinationMatrix:destinationMatrix::id{MPSMatrix}]::Nothing +end +function encode!(cmdbuf::MTLCommandBuffer, kernel::K, destinationVector::MPSVector) where {K<:MPSMatrixRandom} + @objc [kernel::id{K} encodeToCommandBuffer:cmdbuf::id{MTLCommandBuffer} + destinationVector:destinationVector::id{MPSVector}]::Nothing +end + +@objcwrapper immutable=false MPSMatrixRandomMTGP32 <: MPSMatrixRandom +@objcwrapper immutable=false MPSMatrixRandomPhilox <: MPSMatrixRandom + +for R in [:MPSMatrixRandomMTGP32, :MPSMatrixRandomPhilox] + @eval begin + function $R(device) + kernel = @objc [$R alloc]::id{$R} + obj = $R(kernel) + finalizer(release, obj) + @objc [obj::id{$R} initWithDevice:device::id{MTLDevice}]::id{$R} + return obj + end + function $R(device, destinationDataType, seed) + kernel = @objc [$R alloc]::id{$R} + obj = $R(kernel) + finalizer(release, obj) + @objc [obj::id{$R} initWithDevice:device::id{MTLDevice} + destinationDataType:destinationDataType::MPSDataType + seed:seed::NSUInteger]::id{$R} + return obj + end + function $R(device, destinationDataType, seed, distributionDescriptor) + kernel = @objc [$R alloc]::id{$R} + obj = $R(kernel) + finalizer(release, obj) + @objc [obj::id{$R} initWithDevice:device::id{MTLDevice} + destinationDataType:destinationDataType::MPSDataType + seed:seed::NSUInteger + distributionDescriptor:distributionDescriptor::id{MPSMatrixRandomDistributionDescriptor}]::id{$R} + return obj + end + end +end + +synchronizeStateOnCommandBuffer(kern::MPSMatrixRandomMTGP32, cmdbuf::MTLCommandBuffer) = + @objc [obj::id{MPSMatrixRandomMTGP32} synchronizeStateOnCommandBuffer:cmdbuf::id{MTLCommandBuffer}]::Nothing + + + +@inline function _mpsmat_rand!(randkern::MPSMatrixRandom, dest::MtlArray{T}, ::Type{T2}; + queue::MTLCommandQueue = global_queue(current_device()), + async::Bool=false) where {T,T2} + byteoffset = dest.offset * sizeof(T) + (byteoffset % 4 == 0) || error(lazy"Destination buffer offset ($(byteoffset)) must be a multiple of 4.") + + srcbytes = sizeof(dest) + + cmdbuf = if srcbytes % 16 == 0 && dest.offset == 0 + MTLCommandBuffer(queue) do cmdbuf + vecDesc = MPSVectorDescriptor(srcbytes ÷ sizeof(T2), T2) + mpsdest = MPSVector(dest, vecDesc) + encode!(cmdbuf, randkern, mpsdest) + end + else + MTLCommandBuffer(queue) do cmdbuf + len = UInt(ceil(srcbytes / sizeof(T2)) * 4) + vecDesc = MPSVectorDescriptor(len, T2) + tempVec = MPSTemporaryVector(cmdbuf, vecDesc) + encode!(cmdbuf, randkern, tempVec) + MTLBlitCommandEncoder(cmdbuf) do enc + MTL.append_copy!(enc, dest.data[], byteoffset, tempVec.data, tempVec.offset, srcbytes) + end + end + end + + async || wait_completed(cmdbuf) + return +end From f7bac7b04b67278a7fe38b30c44d204faa890a2c Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 16 Apr 2024 16:22:28 -0300 Subject: [PATCH 02/15] Support rand! and rand using MPS where appropriate --- docs/src/usage/array.md | 44 +++++++ lib/mps/MPS.jl | 1 + lib/mps/random.jl | 109 ++++++++++++++++ src/random.jl | 61 +++++++-- test/random.jl | 282 +++++++++++++++++++++++++++++++++++----- 5 files changed, 454 insertions(+), 43 deletions(-) create mode 100644 lib/mps/random.jl diff --git a/docs/src/usage/array.md b/docs/src/usage/array.md index 42e27db7b..0cf57f47a 100644 --- a/docs/src/usage/array.md +++ b/docs/src/usage/array.md @@ -3,6 +3,11 @@ ```@meta DocTestSetup = quote using Metal + + import Random + Random.seed!(0) + + Metal.seed!(0) end ``` @@ -106,3 +111,42 @@ julia> Base.mapreducedim!(identity, +, b, a) 1×1 MtlMatrix{Float32, Metal.PrivateStorage}: 6.0 ``` + +## Random numbers + +Base's convenience functions for generating random numbers are available in Metal as well: + +```jldoctest +julia> Metal.rand(2) +2-element MtlVector{Float32, Private}: + 0.39904642 + 0.8805201 + +julia> Metal.randn(Float32, 2, 1) +2×1 MtlMatrix{Float32, Private}: + -0.18797699 + -0.006818078 +``` + +Behind the scenes, these random numbers come from two different generators: one backed by +[Metal Performance Shaders](https://developer.apple.com/documentation/metalperformanceshaders/mpsmatrixrandom?language=objc), +another by using the GPUArrays.jl random methods. Operations on these generators are implemented using methods from the Random +standard library: + +```jldoctest +julia> using Random, GPUArrays + +julia> a = Random.rand(MPS.default_rng(), Float32, 1) +1-element MtlVector{Float32, Private}: + 0.39904642 + +julia> a = Random.rand!(GPUArrays.default_rng(MtlArray), a) +1-element MtlVector{Float32, Private}: + 0.13394515 +``` + +!!! note + `MPSMatrixRandom` functionality requires Metal.jl > v1.1 + +!!! warning + Do not use `Random.rand!(::MPS.RNG, args...)` or `Random.randn!(::MPS.RNG, args...)` on views as you will most likely overwrite values outside of the view due to limitations in random number generation in the Metal Performance Shaders framework. diff --git a/lib/mps/MPS.jl b/lib/mps/MPS.jl index e33a156b9..7266eae9a 100644 --- a/lib/mps/MPS.jl +++ b/lib/mps/MPS.jl @@ -33,6 +33,7 @@ include("decomposition.jl") include("copy.jl") # integrations +include("random.jl") include("linalg.jl") end diff --git a/lib/mps/random.jl b/lib/mps/random.jl new file mode 100644 index 000000000..e5b4c987d --- /dev/null +++ b/lib/mps/random.jl @@ -0,0 +1,109 @@ +using Random +using Metal: DefaultStorageMode + +""" + MPS.RNG() + +A random number generator using `rand()` in a device kernel. +""" +mutable struct RNG <: AbstractRNG + device::MTLDevice + uniformInteger::MPSMatrixRandomPhilox + uniformFloat32::MPSMatrixRandomPhilox + normalFloat32::MPSMatrixRandomPhilox +end + + +make_seed() = Base.rand(RandomDevice(), UInt) + +function RNG(device::MTLDevice, seed::Integer) + seed = seed%UInt + RNG(device, + MPSMatrixRandomPhilox(device, UInt32, seed, MPSMatrixRandomDefaultDistributionDescriptor()), + MPSMatrixRandomPhilox(device, Float32, seed, MPSMatrixRandomUniformDistributionDescriptor(0, 1)), + MPSMatrixRandomPhilox(device, Float32, seed, MPSMatrixRandomNormalDistributionDescriptor(0, 1)),) +end +@autoreleasepool RNG(seed::Integer) = RNG(current_device(), seed) +RNG(device::MTLDevice) = RNG(device, make_seed()) + +@autoreleasepool RNG() = RNG(current_device(), make_seed()) + +Base.copy(rng::RNG) = RNG(copy(rng.device), copy(rng.uniformInteger), copy(rng.uniformFloat32), copy(rng.normalFloat32)) + +@autoreleasepool function Random.seed!(rng::RNG, seed::Integer) + rng.uniformInteger = MPSMatrixRandomPhilox(rng.device, UInt32, seed, MPSMatrixRandomDefaultDistributionDescriptor()) + rng.uniformFloat32 = MPSMatrixRandomPhilox(rng.device, Float32, seed, MPSMatrixRandomUniformDistributionDescriptor(0, 1)) + rng.normalFloat32 = MPSMatrixRandomPhilox(rng.device, Float32, seed, MPSMatrixRandomNormalDistributionDescriptor(0, 1)) + return rng +end + +Random.seed!(rng::RNG) = Random.seed!(rng, make_seed()) + +const GLOBAL_RNGs = Dict{MTLDevice,MPS.RNG}() +@autoreleasepool function default_rng() + dev = current_device() + get!(GLOBAL_RNGs, dev) do + RNG(dev) + end +end + +const UniformTypes = [Float32,UInt8,Int8,UInt16,Int16,UInt32,Int32,UInt64,Int64] +const UniformType = Union{[Type{T} for T in UniformTypes]...} +const UniformArray = MtlArray{<:Union{Float32,UInt8,Int8,UInt16,Int16,UInt32,Int32,UInt64,Int64}} +@autoreleasepool function Random.rand!(rng::RNG, A::MtlArray{T}) where {T<:Union{UInt8,Int8,UInt16,Int16,UInt32,Int32,UInt64,Int64}} + isempty(A) && return A + _mpsmat_rand!(rng.uniformInteger, A, UInt32) + return A +end + +@autoreleasepool function Random.rand!(rng::RNG, A::MtlArray{Float32}) + isempty(A) && return A + _mpsmat_rand!(rng.uniformFloat32, A, Float32) + return A +end + +const NormalType = Type{Float32} +const NormalArray = MtlArray{<:Float32} +@autoreleasepool function Random.randn!(rng::RNG, A::MtlArray{Float32}) + isempty(A) && return A + _mpsmat_rand!(rng.normalFloat32, A, Float32) + return A +end + +# CPU arrays +function Random.rand!(rng::RNG, A::AbstractArray{T,N}) where {T <: Union{UniformTypes...}, N} + isempty(A) && return A + B = MtlArray{T,N,Shared}(undef, size(A)) + rand!(rng, B) + copyto!(A, unsafe_wrap(Array{T},B)) + return A +end +function Random.randn!(rng::RNG, A::AbstractArray{T,N}) where {T <: Float32, N} + isempty(A) && return A + B = MtlArray{T,N,Shared}(undef, size(A)) + randn!(rng, B) + copyto!(A, unsafe_wrap(Array{T},B)) + return A +end + +# Out of place +Random.rand(rng::RNG, T::UniformType, dims::Dims; storage=DefaultStorageMode) = + Random.rand!(rng, MtlArray{T,length(dims),storage}(undef, dims...)) +Random.randn(rng::RNG, T::NormalType, dims::Dims; storage=DefaultStorageMode) = + Random.randn!(rng, MtlArray{T,length(dims),storage}(undef, dims...)) + +# support all dimension specifications +Random.rand(rng::RNG, T::UniformType, dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = + Random.rand!(rng, MtlArray{T,length(dims) + 1,storage}(undef, dim1, dims...)) +Random.randn(rng::RNG, T::NormalType, dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = + Random.randn!(rng, MtlArray{T,length(dims) + 1,storage}(undef, dim1, dims...)) + +# untyped out-of-place +Random.rand(rng::RNG, dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = + Random.rand!(rng, MtlArray{Float32,length(dims) + 1,storage}(undef, dim1, dims...)) +Random.randn(rng::RNG, dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = + Random.randn!(rng, MtlArray{Float32,length(dims) + 1,storage}(undef, dim1, dims...)) + +# scalars +Random.rand(rng::RNG, T::UniformType=Float32; storage=Shared) = rand(rng, T, 1; storage)[] +Random.randn(rng::RNG, T::NormalType=Float32; storage=Shared) = randn(rng, T, 1; storage)[] diff --git a/src/random.jl b/src/random.jl index 81cc48c00..bc6458253 100644 --- a/src/random.jl +++ b/src/random.jl @@ -1,24 +1,69 @@ using Random +using ..MPS: MPSVector, _mpsmat_rand!, MPSMatrixRandomUniformDistributionDescriptor, + MPSMatrixRandomNormalDistributionDescriptor gpuarrays_rng() = GPUArrays.default_rng(MtlArray) +mpsrand_rng() = MPS.default_rng() # GPUArrays in-place Random.rand!(A::MtlArray) = Random.rand!(gpuarrays_rng(), A) Random.randn!(A::MtlArray) = Random.randn!(gpuarrays_rng(), A) +@inline function can_use_mpsrandom(A::MtlArray{T}) where {T} + return A.offset * sizeof(T) % 4 == 0 && sizeof(A) % 4 == 0 +end + +# Use MPS random functionality where possible +function Random.rand!(A::MPS.UniformArray) + if can_use_mpsrandom(A) + @inline Random.rand!(mpsrand_rng(), A) + else + @inline Random.rand!(gpuarrays_rng(), A) + end + return A +end +function Random.randn!(A::MPS.NormalArray) + if can_use_mpsrandom(A) + @inline Random.randn!(mpsrand_rng(), A) + else + @inline Random.randn!(gpuarrays_rng(), A) + end + return A +end + # GPUArrays out-of-place -rand(T::Type, dims::Dims; storage=DefaultStorageMode) = Random.rand!(MtlArray{T,length(dims),storage}(undef, dims...)) -randn(T::Type, dims::Dims; storage=DefaultStorageMode, kwargs...) = Random.randn!(MtlArray{T,length(dims),storage}(undef, dims...); kwargs...) +rand(T::MPS.UniformType, dims::Dims; storage=DefaultStorageMode) = + Random.rand!(mpsrand_rng(), MtlArray{T,length(dims),storage}(undef, dims...)) +randn(T::MPS.NormalType, dims::Dims; storage=DefaultStorageMode) = + Random.randn!(mpsrand_rng(), MtlArray{T,length(dims),storage}(undef, dims...)) +rand(T::Type, dims::Dims; storage=DefaultStorageMode) = + Random.rand!(gpuarrays_rng(), MtlArray{T,length(dims),storage}(undef, dims...)) +randn(T::Type, dims::Dims; storage=DefaultStorageMode) = + Random.randn!(gpuarrays_rng(), MtlArray{T,length(dims),storage}(undef, dims...)) # support all dimension specifications +rand(T::MPS.UniformType, dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = + Random.rand!(mpsrand_rng(), MtlArray{T,length(dims) + 1,storage}(undef, dim1, dims...)) +randn(T::MPS.NormalType, dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = + Random.randn!(mpsrand_rng(), MtlArray{T,length(dims) + 1,storage}(undef, dim1, dims...)) + rand(T::Type, dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = - Random.rand!(MtlArray{T,length(dims)+1,storage}(undef, dim1, dims...)) -randn(T::Type, dim1::Integer, dims::Integer...; storage=DefaultStorageMode, kwargs...) = - Random.randn!(MtlArray{T,length(dims)+1,storage}(undef, dim1, dims...); kwargs...) + Random.rand!(gpuarrays_rng(), MtlArray{T,length(dims) + 1,storage}(undef, dim1, dims...)) +randn(T::Type, dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = + Random.randn!(gpuarrays_rng(), MtlArray{T,length(dims) + 1,storage}(undef, dim1, dims...)) # untyped out-of-place -rand(dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = Random.rand!(MtlArray{Float32,length(dims)+1,storage}(undef, dim1, dims...)) -randn(dim1::Integer, dims::Integer...; storage=DefaultStorageMode, kwargs...) = Random.randn!(MtlArray{Float32,length(dims)+1,storage}(undef, dim1, dims...); kwargs...) +rand(dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = + Random.rand!(mpsrand_rng(), MtlArray{Float32,length(dims) + 1,storage}(undef, dim1, dims...)) +randn(dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = + Random.randn!(mpsrand_rng(), MtlArray{Float32,length(dims) + 1,storage}(undef, dim1, dims...)) + +# scalars +rand(T::Type=Float32; storage=Shared) = rand(T, 1; storage)[] +randn(T::Type=Float32; storage=Shared) = randn(T, 1; storage)[] # seeding -seed!(seed=Base.rand(UInt64)) = Random.seed!(gpuarrays_rng(), seed) +function seed!(seed=Base.rand(UInt64)) + Random.seed!(gpuarrays_rng(), seed) + Random.seed!(mpsrand_rng(), seed) +end diff --git a/test/random.jl b/test/random.jl index 89c771bca..a8f3c3186 100644 --- a/test/random.jl +++ b/test/random.jl @@ -1,39 +1,251 @@ using Random -@testset "rand" begin - -# in-place -for (f,T) in ((rand!,Float16), - (rand!,Float32), - (randn!,Float16), - (randn!,Float32)), - d in (2, (2,2), (2,2,2), 3, (3,3), (3,3,3)) - A = MtlArray{T}(undef, d) - fill!(A, T(0)) - f(A) - @test !iszero(collect(A)) -end - -# out-of-place, with implicit type -for (f,T) in ((Metal.rand,Float32), (Metal.randn,Float32)), - args in ((2,), (2, 2), (3,), (3, 3)) - A = f(args...) - @test eltype(A) == T -end - -# out-of-place, with type specified -for (f,T) in ((Metal.rand,Float32), (Metal.randn,Float32), - (rand,Float32), (randn,Float32)), - args in ((T, 2), (T, 2, 2), (T, (2, 2)), (T, 3), (T, 3, 3), (T, (3, 3))) - A = f(args...) - @test eltype(A) == T -end - -## seeding -Metal.seed!(1) -a = Metal.rand(Int32, 1) -Metal.seed!(1) -b = Metal.rand(Int32, 1) -@test iszero(collect(a) - collect(b)) +const RAND_TYPES = [Float16, Float32, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, + UInt64] +const RANDN_TYPES = [Float16, Float32] +const INPLACE_TUPLES = [[(rand!, T) for T in RAND_TYPES]; + [(randn!, T) for T in RANDN_TYPES]] +const OOPLACE_TUPLES = [[(Metal.rand, rand, T) for T in RAND_TYPES]; + [(Metal.randn, rand, T) for T in RANDN_TYPES]] +@testset "random" begin + # in-place + @testset "in-place" begin + rng = Metal.MPS.RNG() + + # Seed the default generators to work around value of 0 being + # randomly generated in the size 1 Int8 Array test in 1.11 + Metal.seed!(123) + + @testset "$f with $T" for (f, T) in INPLACE_TUPLES + @testset "$d" for d in (1, 3, (3, 3), (3, 3, 3), 16, (16, 16), (16, 16, 16), (1000,), (1000,1000)) + A = MtlArray{T}(undef, d) + + # default_rng + fill!(A, T(0)) + f(A) + @test !iszero(collect(A)) + + # specified MPS rng + if T != Float16 + fill!(A, T(0)) + f(rng, A) + @test !iszero(collect(A)) + end + end + + @testset "0" begin + A = MtlArray{T}(undef, 0) + + # default_rng + f(A) + @test A isa MtlArray{T,1} + @test Array(A) == fill(1, 0) + + # specified MPS rng + if T != Float16 + fill!(A, T(0)) + f(rng, A) + @test Array(A) == fill(1, 0) + end + end + end + end + + # in-place contiguous views + @testset "in-place for views" begin + @testset "$f with $T" for (f, T) in INPLACE_TUPLES + alen = 100 + A = MtlArray{T}(undef, alen) + function test_view!(X::MtlArray{T}, idx) where {T} + fill!(X, T(0)) + view_X = @view X[idx] + f(view_X) + cpuX = collect(X) + not_zero_in_view = !iszero(cpuX[idx]) + rest_of_array_untouched = iszero(cpuX[1:alen .∉ Ref(idx)]) + return not_zero_in_view, rest_of_array_untouched + end + + # Test when view offset is 0 and buffer size not multiple of 4 + @testset "Off == 0, buf % 4 != 0" begin + not_zero_in_view, rest_of_array_untouched = test_view!(A, 1:51) + @test not_zero_in_view + @test rest_of_array_untouched + end + + # Test when view offset is 0 and buffer size is multiple of 16 + @testset "Off == 0, buf % 16 == 0" begin + not_zero_in_view, rest_of_array_untouched = test_view!(A, 1:32) + @test not_zero_in_view + @test rest_of_array_untouched + end + + # Test when view offset is 0 and buffer size is multiple of 4 + @testset "Off == 0, buf % 4 == 0" begin + not_zero_in_view, rest_of_array_untouched = test_view!(A, 1:36) + @test not_zero_in_view + @test rest_of_array_untouched + end + + # Test when view offset is not 0 nor multiple of 4 and buffer size not multiple of 16 + @testset "Off != 0, buf % 4 != 0" begin + not_zero_in_view, rest_of_array_untouched = test_view!(A, 3:51) + @test not_zero_in_view + @test rest_of_array_untouched + end + + # Test when view offset is multiple of 4 and buffer size not multiple of 4 + @testset "Off % 4 == 0, buf % 4 != 0" begin + not_zero_in_view, rest_of_array_untouched = test_view!(A, 17:51) + @test not_zero_in_view + @test rest_of_array_untouched + end + + # Test when view offset is multiple of 4 and buffer size multiple of 16 + @testset "Off % 4 == 0, buf % 16 == 0" begin + not_zero_in_view, rest_of_array_untouched = test_view!(A, 9:40) + @test not_zero_in_view + @test rest_of_array_untouched + end + + # Test when view offset is multiple of 4 and buffer size multiple of 4 + @testset "Off % 16 == 0, buf % 4 == 0" begin + not_zero_in_view, rest_of_array_untouched = test_view!(A, 9:32) + @test not_zero_in_view + @test rest_of_array_untouched + end + end + + # Test when views try to use rand!(rng, args..) + @testset "MPS.RNG with views" begin + rng = Metal.MPS.RNG() + @testset "$f with $T" for (f, T) in ((randn!, Float32),(rand!, Int64),(rand!, Float32), (rand!, UInt16), (rand!,Int8)) + A = MtlArray{T}(undef, 100) + + ## Offset > 0 + fill!(A, T(0)) + idx = 4:51 + view_A = @view A[idx] + + # Errors in Julia before crashing whole process + if view_A.offset * sizeof(T) % 4 != 0 + @test_throws "Destination buffer offset ($(view_A.offset*sizeof(T)))" f(rng, view_A) + else + f(rng, view_A) + + cpuA = collect(A) + @test !iszero(cpuA[idx]) + + @test iszero(cpuA[1:100 .∉ Ref(idx)]) broken=(sizeof(view_A) % 4 != 0) + end + + ## Offset == 0 + fill!(A, T(0)) + idx = 1:51 + view_A = @view A[idx] + f(rng, view_A) + + cpuA = collect(A) + @test !iszero(cpuA[idx]) + + # XXX: Why are the 8-bit and 16-bit type tests not broken? + @test iszero(cpuA[1:100 .∉ Ref(idx)])# broken=(sizeof(view_A) % 4 != 0) + end + end + end + # out-of-place + @testset "out-of-place" begin + @testset "$fr with implicit type" for (fm, fr, T) in + ((Metal.rand, rand, Float32), (Metal.randn, rand, Float32)) + rng = Metal.MPS.RNG() + @testset "args" for args in ((0,), (1,), (3,), (3, 3), (16,), (16, 16), (1000,), (1000,1000)) + # default_rng + A = fm(args...) + @test eltype(A) == T + + # specified MPS rng + B = fr(rng, args...) + @test eltype(B) == T + end + + @testset "scalar" begin + a = fm() + @test typeof(a) == T + b = fr(rng) + @test typeof(b) == T + end + end + + # out-of-place, with type specified + @testset "$fr with $T" for (fm, fr, T) in OOPLACE_TUPLES + rng = Metal.MPS.RNG() + @testset "$args" for args in ((T, 0), + (T, 1), + (T, 3), + (T, 3, 3), + (T, (3, 3)), + (T, 16), + (T, 16, 16), + (T, (16, 16)), + (T, 1000), + (T, 1000, 1000),) + # default_rng + A = fm(args...) + @test eltype(A) == T + + # specified MPS rng + if T != Float16 + B = fr(rng, args...) + @test eltype(B) == T + end + end + + @testset "scalar" begin + a = fm(T) + @test typeof(a) == T + b = fr(rng, T) + @test typeof(b) == T + end + end + end + + ## CPU Arrays with MPS rng + @testset "CPU Arrays" begin + MPS_TUPLES = filter(INPLACE_TUPLES) do tup + tup[2] != Float16 + end + rng = Metal.MPS.RNG() + @testset "$f with $T" for (f, T) in MPS_TUPLES + + @testset "$d" for d in (1, 3, (3, 3), (3, 3, 3), 16, (16, 16), (16, 16, 16), (1000,), (1000,1000)) + A = zeros(T, d) + f(rng, A) + @test !iszero(collect(A)) + end + + @testset "0" begin + A = rand(T, 0) + b = rand(T) + fill!(A, b) + @test A isa Array{T,1} + @test Array(A) == fill(b, 0) + end + end + end + + ## seeding + @testset "Seeding $L" for (f,T,L) in [(Metal.rand,UInt32,"Uniform Integers MPS"), + (Metal.rand,Float32,"Uniform Float32 MPS"), + (Metal.randn,Float32,"Normal Float32 MPS"), + (Metal.randn,Float16,"Float16 GPUArrays")] + @testset "$d" for d in (1, 3, (3, 3, 3), 16, (16, 16), (16, 16, 16), (1000,), (1000,1000)) + Metal.seed!(1) + a = f(T, d) + Metal.seed!(1) + b = f(T, d) + # TODO: Remove once https://github.com/JuliaGPU/Metal.jl/issues/331 is fixed + @test iszero(collect(a) - collect(b)) broken = (T == Float16 && d == (1000,1000)) + end + end end # testset From e129b39c60b9ef66378dd0562ac49aab524cfe25 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 21 May 2024 13:27:03 -0300 Subject: [PATCH 03/15] Comply with Metal documentation by preventing copies between buffer sizes that are not divisible by 4 --- docs/src/usage/array.md | 2 +- lib/mps/matrixrandom.jl | 17 +++++----- lib/mps/random.jl | 4 +-- src/random.jl | 44 +++++++++++++------------- test/random.jl | 69 +++++++++++++++++++++++------------------ 5 files changed, 74 insertions(+), 62 deletions(-) diff --git a/docs/src/usage/array.md b/docs/src/usage/array.md index 0cf57f47a..dc1a44e10 100644 --- a/docs/src/usage/array.md +++ b/docs/src/usage/array.md @@ -149,4 +149,4 @@ julia> a = Random.rand!(GPUArrays.default_rng(MtlArray), a) `MPSMatrixRandom` functionality requires Metal.jl > v1.1 !!! warning - Do not use `Random.rand!(::MPS.RNG, args...)` or `Random.randn!(::MPS.RNG, args...)` on views as you will most likely overwrite values outside of the view due to limitations in random number generation in the Metal Performance Shaders framework. + `Random.rand!(::MPS.RNG, args...)` andc `Random.randn!(::MPS.RNG, args...)` have a framework limitation that requires the byte offset and byte size of the destination array to be a multiple of 4. diff --git a/lib/mps/matrixrandom.jl b/lib/mps/matrixrandom.jl index 0d5eb8741..6c624021b 100644 --- a/lib/mps/matrixrandom.jl +++ b/lib/mps/matrixrandom.jl @@ -114,29 +114,32 @@ synchronizeStateOnCommandBuffer(kern::MPSMatrixRandomMTGP32, cmdbuf::MTLCommandB @objc [obj::id{MPSMatrixRandomMTGP32} synchronizeStateOnCommandBuffer:cmdbuf::id{MTLCommandBuffer}]::Nothing - @inline function _mpsmat_rand!(randkern::MPSMatrixRandom, dest::MtlArray{T}, ::Type{T2}; queue::MTLCommandQueue = global_queue(current_device()), async::Bool=false) where {T,T2} byteoffset = dest.offset * sizeof(T) - (byteoffset % 4 == 0) || error(lazy"Destination buffer offset ($(byteoffset)) must be a multiple of 4.") + bytesize = sizeof(dest) - srcbytes = sizeof(dest) + # Even though `append_copy`` seems to work with any size or offset values, the documentation at + # https://developer.apple.com/documentation/metal/mtlblitcommandencoder/1400767-copyfrombuffer?language=objc + # mentions that both must be multiples of 4 bytes in MacOS so error when they are not + (bytesize % 4 == 0) || error(lazy"Destination buffer bytesize ($(bytesize)) must be a multiple of 4.") + (byteoffset % 4 == 0) || error(lazy"Destination buffer offset ($(byteoffset)) must be a multiple of 4.") - cmdbuf = if srcbytes % 16 == 0 && dest.offset == 0 + cmdbuf = if bytesize % 16 == 0 && dest.offset == 0 MTLCommandBuffer(queue) do cmdbuf - vecDesc = MPSVectorDescriptor(srcbytes ÷ sizeof(T2), T2) + vecDesc = MPSVectorDescriptor(bytesize ÷ sizeof(T2), T2) mpsdest = MPSVector(dest, vecDesc) encode!(cmdbuf, randkern, mpsdest) end else MTLCommandBuffer(queue) do cmdbuf - len = UInt(ceil(srcbytes / sizeof(T2)) * 4) + len = UInt(ceil(bytesize / sizeof(T2)) * 4) vecDesc = MPSVectorDescriptor(len, T2) tempVec = MPSTemporaryVector(cmdbuf, vecDesc) encode!(cmdbuf, randkern, tempVec) MTLBlitCommandEncoder(cmdbuf) do enc - MTL.append_copy!(enc, dest.data[], byteoffset, tempVec.data, tempVec.offset, srcbytes) + MTL.append_copy!(enc, dest.data[], byteoffset, tempVec.data, tempVec.offset, bytesize) end end end diff --git a/lib/mps/random.jl b/lib/mps/random.jl index e5b4c987d..7287a1471 100644 --- a/lib/mps/random.jl +++ b/lib/mps/random.jl @@ -105,5 +105,5 @@ Random.randn(rng::RNG, dim1::Integer, dims::Integer...; storage=DefaultStorageMo Random.randn!(rng, MtlArray{Float32,length(dims) + 1,storage}(undef, dim1, dims...)) # scalars -Random.rand(rng::RNG, T::UniformType=Float32; storage=Shared) = rand(rng, T, 1; storage)[] -Random.randn(rng::RNG, T::NormalType=Float32; storage=Shared) = randn(rng, T, 1; storage)[] +Random.rand(rng::RNG, T::UniformType=Float32; storage=Shared) = rand(rng, T, 4; storage)[1] +Random.randn(rng::RNG, T::NormalType=Float32; storage=Shared) = randn(rng, T, 4; storage)[1] diff --git a/src/random.jl b/src/random.jl index bc6458253..25783dcc7 100644 --- a/src/random.jl +++ b/src/random.jl @@ -15,37 +15,37 @@ end # Use MPS random functionality where possible function Random.rand!(A::MPS.UniformArray) - if can_use_mpsrandom(A) - @inline Random.rand!(mpsrand_rng(), A) - else - @inline Random.rand!(gpuarrays_rng(), A) - end - return A + rng = can_use_mpsrandom(A) ? mpsrand_rng() : gpuarrays_rng() + return Random.rand!(rng, A) end function Random.randn!(A::MPS.NormalArray) - if can_use_mpsrandom(A) - @inline Random.randn!(mpsrand_rng(), A) - else - @inline Random.randn!(gpuarrays_rng(), A) - end - return A + rng = can_use_mpsrandom(A) ? mpsrand_rng() : gpuarrays_rng() + return Random.randn!(rng, A) end # GPUArrays out-of-place -rand(T::MPS.UniformType, dims::Dims; storage=DefaultStorageMode) = - Random.rand!(mpsrand_rng(), MtlArray{T,length(dims),storage}(undef, dims...)) -randn(T::MPS.NormalType, dims::Dims; storage=DefaultStorageMode) = - Random.randn!(mpsrand_rng(), MtlArray{T,length(dims),storage}(undef, dims...)) +function rand(T::MPS.UniformType, dims::Dims; storage=DefaultStorageMode) + rng = prod(dims) * sizeof(T) % 4 == 0 ? mpsrand_rng() : gpuarrays_rng() + return Random.rand!(rng, MtlArray{T,length(dims),storage}(undef, dims...)) +end +function randn(T::MPS.NormalType, dims::Dims; storage=DefaultStorageMode) + rng = prod(dims) * sizeof(T) % 4 == 0 ? mpsrand_rng() : gpuarrays_rng() + return Random.randn!(rng, MtlArray{T,length(dims),storage}(undef, dims...)) +end rand(T::Type, dims::Dims; storage=DefaultStorageMode) = Random.rand!(gpuarrays_rng(), MtlArray{T,length(dims),storage}(undef, dims...)) randn(T::Type, dims::Dims; storage=DefaultStorageMode) = Random.randn!(gpuarrays_rng(), MtlArray{T,length(dims),storage}(undef, dims...)) # support all dimension specifications -rand(T::MPS.UniformType, dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = - Random.rand!(mpsrand_rng(), MtlArray{T,length(dims) + 1,storage}(undef, dim1, dims...)) -randn(T::MPS.NormalType, dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = - Random.randn!(mpsrand_rng(), MtlArray{T,length(dims) + 1,storage}(undef, dim1, dims...)) +function rand(T::MPS.UniformType, dim1::Integer, dims::Integer...; storage=DefaultStorageMode) + rng = (dim1 * prod(dims) * sizeof(T)) % 4 == 0 ? mpsrand_rng() : gpuarrays_rng() + return Random.rand!(rng, MtlArray{T,length(dims) + 1,storage}(undef, dim1, dims...)) +end +function randn(T::MPS.NormalType, dim1::Integer, dims::Integer...; storage=DefaultStorageMode) + rng = (dim1 * prod(dims) * sizeof(T)) % 4 == 0 ? mpsrand_rng() : gpuarrays_rng() + return Random.randn!(rng, MtlArray{T,length(dims) + 1,storage}(undef, dim1, dims...)) +end rand(T::Type, dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = Random.rand!(gpuarrays_rng(), MtlArray{T,length(dims) + 1,storage}(undef, dim1, dims...)) @@ -59,8 +59,8 @@ randn(dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = Random.randn!(mpsrand_rng(), MtlArray{Float32,length(dims) + 1,storage}(undef, dim1, dims...)) # scalars -rand(T::Type=Float32; storage=Shared) = rand(T, 1; storage)[] -randn(T::Type=Float32; storage=Shared) = randn(T, 1; storage)[] +rand(T::Type=Float32; storage=Shared) = rand(T, 4; storage)[1] +randn(T::Type=Float32; storage=Shared) = randn(T, 4; storage)[1] # seeding function seed!(seed=Base.rand(UInt64)) diff --git a/test/random.jl b/test/random.jl index a8f3c3186..f0e94edef 100644 --- a/test/random.jl +++ b/test/random.jl @@ -1,4 +1,5 @@ using Random +using Metal: can_use_mpsrandom const RAND_TYPES = [Float16, Float32, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64] @@ -29,8 +30,12 @@ const OOPLACE_TUPLES = [[(Metal.rand, rand, T) for T in RAND_TYPES]; # specified MPS rng if T != Float16 fill!(A, T(0)) - f(rng, A) - @test !iszero(collect(A)) + if can_use_mpsrandom(A) + f(rng, A) + @test !iszero(collect(A)) + else + @test_throws "Destination buffer" f(rng, A) + end end end @@ -45,8 +50,12 @@ const OOPLACE_TUPLES = [[(Metal.rand, rand, T) for T in RAND_TYPES]; # specified MPS rng if T != Float16 fill!(A, T(0)) - f(rng, A) - @test Array(A) == fill(1, 0) + if can_use_mpsrandom(A) + f(rng, A) + @test Array(A) == fill(1, 0) + else + @test_throws "Destination buffer" f(rng, A) + end end end end @@ -125,32 +134,33 @@ const OOPLACE_TUPLES = [[(Metal.rand, rand, T) for T in RAND_TYPES]; ## Offset > 0 fill!(A, T(0)) - idx = 4:51 + idx = 4:50 view_A = @view A[idx] # Errors in Julia before crashing whole process - if view_A.offset * sizeof(T) % 4 != 0 - @test_throws "Destination buffer offset ($(view_A.offset*sizeof(T)))" f(rng, view_A) - else + if can_use_mpsrandom(view_A) f(rng, view_A) cpuA = collect(A) @test !iszero(cpuA[idx]) - @test iszero(cpuA[1:100 .∉ Ref(idx)]) broken=(sizeof(view_A) % 4 != 0) + else + @test_throws "Destination buffer" f(rng, view_A) end ## Offset == 0 fill!(A, T(0)) idx = 1:51 view_A = @view A[idx] - f(rng, view_A) - - cpuA = collect(A) - @test !iszero(cpuA[idx]) + if can_use_mpsrandom(view_A) + f(rng, view_A) - # XXX: Why are the 8-bit and 16-bit type tests not broken? - @test iszero(cpuA[1:100 .∉ Ref(idx)])# broken=(sizeof(view_A) % 4 != 0) + cpuA = collect(A) + @test !iszero(cpuA[idx]) + @test iszero(cpuA[1:100 .∉ Ref(idx)]) + else + @test_throws "Destination buffer" f(rng, view_A) + end end end end @@ -196,8 +206,12 @@ const OOPLACE_TUPLES = [[(Metal.rand, rand, T) for T in RAND_TYPES]; # specified MPS rng if T != Float16 - B = fr(rng, args...) - @test eltype(B) == T + if length(zeros(args...)) * sizeof(T) % 4 == 0 + B = fr(rng, args...) + @test eltype(B) == T + else + @test_throws "Destination buffer" fr(rng, args...) + end end end @@ -212,24 +226,19 @@ const OOPLACE_TUPLES = [[(Metal.rand, rand, T) for T in RAND_TYPES]; ## CPU Arrays with MPS rng @testset "CPU Arrays" begin - MPS_TUPLES = filter(INPLACE_TUPLES) do tup + mps_tuples = filter(INPLACE_TUPLES) do tup tup[2] != Float16 end rng = Metal.MPS.RNG() - @testset "$f with $T" for (f, T) in MPS_TUPLES - + @testset "$f with $T" for (f, T) in mps_tuples @testset "$d" for d in (1, 3, (3, 3), (3, 3, 3), 16, (16, 16), (16, 16, 16), (1000,), (1000,1000)) A = zeros(T, d) - f(rng, A) - @test !iszero(collect(A)) - end - - @testset "0" begin - A = rand(T, 0) - b = rand(T) - fill!(A, b) - @test A isa Array{T,1} - @test Array(A) == fill(b, 0) + if (prod(d) * sizeof(T)) % 4 == 0 + f(rng, A) + @test !iszero(collect(A)) + else + @test_throws "Destination buffer" f(rng, A) + end end end end From 29b56a4a726ea6c433ca0896a2b17c9a5ee4ddee Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Fri, 24 May 2024 12:55:18 -0300 Subject: [PATCH 04/15] Address review comments --- docs/src/usage/array.md | 2 +- lib/mps/matrixrandom.jl | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/docs/src/usage/array.md b/docs/src/usage/array.md index dc1a44e10..40b53966e 100644 --- a/docs/src/usage/array.md +++ b/docs/src/usage/array.md @@ -149,4 +149,4 @@ julia> a = Random.rand!(GPUArrays.default_rng(MtlArray), a) `MPSMatrixRandom` functionality requires Metal.jl > v1.1 !!! warning - `Random.rand!(::MPS.RNG, args...)` andc `Random.randn!(::MPS.RNG, args...)` have a framework limitation that requires the byte offset and byte size of the destination array to be a multiple of 4. + `Random.rand!(::MPS.RNG, args...)` and `Random.randn!(::MPS.RNG, args...)` have a framework limitation that requires the byte offset and byte size of the destination array to be a multiple of 4. diff --git a/lib/mps/matrixrandom.jl b/lib/mps/matrixrandom.jl index 6c624021b..10366c26b 100644 --- a/lib/mps/matrixrandom.jl +++ b/lib/mps/matrixrandom.jl @@ -24,7 +24,7 @@ end function MPSMatrixRandomDefaultDistributionDescriptor() desc = @objc [MPSMatrixRandomDistributionDescriptor defaultDistributionDescriptor]::id{MPSMatrixRandomDistributionDescriptor} obj = MPSMatrixRandomDistributionDescriptor(desc) - # XXX: who releases this object? + finalizer(release, obj) return obj end @@ -35,7 +35,6 @@ function MPSMatrixRandomNormalDistributionDescriptor(mean, standardDeviation) desc = @objc [MPSMatrixRandomDistributionDescriptor normalDistributionDescriptorWithMean:mean::Float32 standardDeviation:standardDeviation::Float32]::id{MPSMatrixRandomDistributionDescriptor} obj = MPSMatrixRandomDistributionDescriptor(desc) - # XXX: who releases this object? return obj end @@ -45,7 +44,6 @@ function MPSMatrixRandomNormalDistributionDescriptor(mean, standardDeviation, mi minimum:minimum::Float32 maximum:maximum::Float32]::id{MPSMatrixRandomDistributionDescriptor} obj = MPSMatrixRandomDistributionDescriptor(desc) - # XXX: who releases this object? return obj end @@ -53,7 +51,6 @@ function MPSMatrixRandomUniformDistributionDescriptor(minimum, maximum) desc = @objc [MPSMatrixRandomDistributionDescriptor uniformDistributionDescriptorWithMinimum:minimum::Float32 maximum:maximum::Float32]::id{MPSMatrixRandomDistributionDescriptor} obj = MPSMatrixRandomDistributionDescriptor(desc) - # XXX: who releases this object? return obj end From 423569784eaf65ceb385849b7fadc91314c20689 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Fri, 24 May 2024 13:13:28 -0300 Subject: [PATCH 05/15] Update compat note --- docs/src/usage/array.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/usage/array.md b/docs/src/usage/array.md index 40b53966e..74e0e4c34 100644 --- a/docs/src/usage/array.md +++ b/docs/src/usage/array.md @@ -146,7 +146,7 @@ julia> a = Random.rand!(GPUArrays.default_rng(MtlArray), a) ``` !!! note - `MPSMatrixRandom` functionality requires Metal.jl > v1.1 + `MPSMatrixRandom` functionality requires Metal.jl >= v1.2 !!! warning `Random.rand!(::MPS.RNG, args...)` and `Random.randn!(::MPS.RNG, args...)` have a framework limitation that requires the byte offset and byte size of the destination array to be a multiple of 4. From 2595914143215c4faac65dd7953737879110ca78 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Thu, 30 May 2024 10:34:12 -0300 Subject: [PATCH 06/15] Remove reliance of gpu random functionality in copy tests. --- test/mps/copy.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/mps/copy.jl b/test/mps/copy.jl index ac20f533e..0a7bef801 100644 --- a/test/mps/copy.jl +++ b/test/mps/copy.jl @@ -33,7 +33,8 @@ end Ts = Ts[.!(Ts .<: IGNORE_UNION)] @testset "$T" for T in Ts for dim in ((16,16), (10,500), (500,10), (256,512)) - srcMat = Metal.rand(T, dim) + + srcMat = MtlArray(rand(T, dim)) dstMat = copytest(srcMat, false, false) @test dstMat == srcMat From c4771382d26cf675c93b167269e05563a201b8f4 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:39:23 -0300 Subject: [PATCH 07/15] Fix segmentation fault --- lib/mps/matrixrandom.jl | 1 - test/random.jl | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/mps/matrixrandom.jl b/lib/mps/matrixrandom.jl index 10366c26b..da41a705f 100644 --- a/lib/mps/matrixrandom.jl +++ b/lib/mps/matrixrandom.jl @@ -24,7 +24,6 @@ end function MPSMatrixRandomDefaultDistributionDescriptor() desc = @objc [MPSMatrixRandomDistributionDescriptor defaultDistributionDescriptor]::id{MPSMatrixRandomDistributionDescriptor} obj = MPSMatrixRandomDistributionDescriptor(desc) - finalizer(release, obj) return obj end diff --git a/test/random.jl b/test/random.jl index f0e94edef..40aab582a 100644 --- a/test/random.jl +++ b/test/random.jl @@ -1,4 +1,5 @@ using Random +using Metal using Metal: can_use_mpsrandom const RAND_TYPES = [Float16, Float32, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, From 65067a0939dfe560451b61ab2e807e3b83c684da Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Fri, 7 Jun 2024 13:11:33 -0300 Subject: [PATCH 08/15] Fix doctests --- docs/src/usage/array.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/src/usage/array.md b/docs/src/usage/array.md index 74e0e4c34..2b13322b4 100644 --- a/docs/src/usage/array.md +++ b/docs/src/usage/array.md @@ -3,11 +3,12 @@ ```@meta DocTestSetup = quote using Metal + using GPUArrays import Random - Random.seed!(0) + Random.seed!(1) - Metal.seed!(0) + Metal.seed!(1) end ``` @@ -119,13 +120,13 @@ Base's convenience functions for generating random numbers are available in Meta ```jldoctest julia> Metal.rand(2) 2-element MtlVector{Float32, Private}: - 0.39904642 - 0.8805201 + 0.89025915 + 0.8946847 julia> Metal.randn(Float32, 2, 1) 2×1 MtlMatrix{Float32, Private}: - -0.18797699 - -0.006818078 + 1.2279074 + 1.2518331 ``` Behind the scenes, these random numbers come from two different generators: one backed by @@ -138,11 +139,11 @@ julia> using Random, GPUArrays julia> a = Random.rand(MPS.default_rng(), Float32, 1) 1-element MtlVector{Float32, Private}: - 0.39904642 + 0.89025915 julia> a = Random.rand!(GPUArrays.default_rng(MtlArray), a) 1-element MtlVector{Float32, Private}: - 0.13394515 + 0.0705002 ``` !!! note From c952493677a81d82729f6e81437394902a8a19fc Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sat, 13 Jul 2024 18:18:37 -0300 Subject: [PATCH 09/15] Docstring --- docs/src/usage/array.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/usage/array.md b/docs/src/usage/array.md index 2b13322b4..27c865c12 100644 --- a/docs/src/usage/array.md +++ b/docs/src/usage/array.md @@ -147,7 +147,7 @@ julia> a = Random.rand!(GPUArrays.default_rng(MtlArray), a) ``` !!! note - `MPSMatrixRandom` functionality requires Metal.jl >= v1.2 + `MPSMatrixRandom` functionality requires Metal.jl >= v1.3 !!! warning `Random.rand!(::MPS.RNG, args...)` and `Random.randn!(::MPS.RNG, args...)` have a framework limitation that requires the byte offset and byte size of the destination array to be a multiple of 4. From 47cbf7e7ea28e4a7d4d61ddfb055f09ffa88473f Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 16 Jul 2024 16:00:33 -0300 Subject: [PATCH 10/15] `current_device()` -> `device()` --- lib/mps/matrixrandom.jl | 2 +- lib/mps/random.jl | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/mps/matrixrandom.jl b/lib/mps/matrixrandom.jl index da41a705f..bd0e602bd 100644 --- a/lib/mps/matrixrandom.jl +++ b/lib/mps/matrixrandom.jl @@ -111,7 +111,7 @@ synchronizeStateOnCommandBuffer(kern::MPSMatrixRandomMTGP32, cmdbuf::MTLCommandB @inline function _mpsmat_rand!(randkern::MPSMatrixRandom, dest::MtlArray{T}, ::Type{T2}; - queue::MTLCommandQueue = global_queue(current_device()), + queue::MTLCommandQueue = global_queue(device()), async::Bool=false) where {T,T2} byteoffset = dest.offset * sizeof(T) bytesize = sizeof(dest) diff --git a/lib/mps/random.jl b/lib/mps/random.jl index 7287a1471..7c7982bbe 100644 --- a/lib/mps/random.jl +++ b/lib/mps/random.jl @@ -23,10 +23,10 @@ function RNG(device::MTLDevice, seed::Integer) MPSMatrixRandomPhilox(device, Float32, seed, MPSMatrixRandomUniformDistributionDescriptor(0, 1)), MPSMatrixRandomPhilox(device, Float32, seed, MPSMatrixRandomNormalDistributionDescriptor(0, 1)),) end -@autoreleasepool RNG(seed::Integer) = RNG(current_device(), seed) +@autoreleasepool RNG(seed::Integer) = RNG(device(), seed) RNG(device::MTLDevice) = RNG(device, make_seed()) -@autoreleasepool RNG() = RNG(current_device(), make_seed()) +@autoreleasepool RNG() = RNG(device(), make_seed()) Base.copy(rng::RNG) = RNG(copy(rng.device), copy(rng.uniformInteger), copy(rng.uniformFloat32), copy(rng.normalFloat32)) @@ -41,7 +41,7 @@ Random.seed!(rng::RNG) = Random.seed!(rng, make_seed()) const GLOBAL_RNGs = Dict{MTLDevice,MPS.RNG}() @autoreleasepool function default_rng() - dev = current_device() + dev = device() get!(GLOBAL_RNGs, dev) do RNG(dev) end From 0220ee5d441bce6b213466158e958597e504eb6b Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:45:36 -0300 Subject: [PATCH 11/15] Adapt to storage changes --- lib/mps/random.jl | 8 ++++---- src/random.jl | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/mps/random.jl b/lib/mps/random.jl index 7c7982bbe..81ce58585 100644 --- a/lib/mps/random.jl +++ b/lib/mps/random.jl @@ -73,14 +73,14 @@ end # CPU arrays function Random.rand!(rng::RNG, A::AbstractArray{T,N}) where {T <: Union{UniformTypes...}, N} isempty(A) && return A - B = MtlArray{T,N,Shared}(undef, size(A)) + B = MtlArray{T,N,SharedStorage}(undef, size(A)) rand!(rng, B) copyto!(A, unsafe_wrap(Array{T},B)) return A end function Random.randn!(rng::RNG, A::AbstractArray{T,N}) where {T <: Float32, N} isempty(A) && return A - B = MtlArray{T,N,Shared}(undef, size(A)) + B = MtlArray{T,N,SharedStorage}(undef, size(A)) randn!(rng, B) copyto!(A, unsafe_wrap(Array{T},B)) return A @@ -105,5 +105,5 @@ Random.randn(rng::RNG, dim1::Integer, dims::Integer...; storage=DefaultStorageMo Random.randn!(rng, MtlArray{Float32,length(dims) + 1,storage}(undef, dim1, dims...)) # scalars -Random.rand(rng::RNG, T::UniformType=Float32; storage=Shared) = rand(rng, T, 4; storage)[1] -Random.randn(rng::RNG, T::NormalType=Float32; storage=Shared) = randn(rng, T, 4; storage)[1] +Random.rand(rng::RNG, T::UniformType=Float32; storage=SharedStorage) = rand(rng, T, 4; storage)[1] +Random.randn(rng::RNG, T::NormalType=Float32; storage=SharedStorage) = randn(rng, T, 4; storage)[1] diff --git a/src/random.jl b/src/random.jl index 25783dcc7..acb456766 100644 --- a/src/random.jl +++ b/src/random.jl @@ -59,8 +59,8 @@ randn(dim1::Integer, dims::Integer...; storage=DefaultStorageMode) = Random.randn!(mpsrand_rng(), MtlArray{Float32,length(dims) + 1,storage}(undef, dim1, dims...)) # scalars -rand(T::Type=Float32; storage=Shared) = rand(T, 4; storage)[1] -randn(T::Type=Float32; storage=Shared) = randn(T, 4; storage)[1] +rand(T::Type=Float32; storage=SharedStorage) = rand(T, 4; storage)[1] +randn(T::Type=Float32; storage=SharedStorage) = randn(T, 4; storage)[1] # seeding function seed!(seed=Base.rand(UInt64)) From 05f4752cfcf39eac4a8331f646ff415ff3ee9751 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Wed, 24 Jul 2024 09:08:13 -0300 Subject: [PATCH 12/15] Update docs and clean up tests --- docs/src/usage/array.md | 10 +++++----- test/random.jl | 12 ++++-------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/docs/src/usage/array.md b/docs/src/usage/array.md index 27c865c12..bd1a9832e 100644 --- a/docs/src/usage/array.md +++ b/docs/src/usage/array.md @@ -119,12 +119,12 @@ Base's convenience functions for generating random numbers are available in Meta ```jldoctest julia> Metal.rand(2) -2-element MtlVector{Float32, Private}: +2-element MtlVector{Float32, Metal.PrivateStorage}: 0.89025915 0.8946847 julia> Metal.randn(Float32, 2, 1) -2×1 MtlMatrix{Float32, Private}: +2×1 MtlMatrix{Float32, Metal.PrivateStorage}: 1.2279074 1.2518331 ``` @@ -138,16 +138,16 @@ standard library: julia> using Random, GPUArrays julia> a = Random.rand(MPS.default_rng(), Float32, 1) -1-element MtlVector{Float32, Private}: +1-element MtlVector{Float32, Metal.PrivateStorage}: 0.89025915 julia> a = Random.rand!(GPUArrays.default_rng(MtlArray), a) -1-element MtlVector{Float32, Private}: +1-element MtlVector{Float32, Metal.PrivateStorage}: 0.0705002 ``` !!! note - `MPSMatrixRandom` functionality requires Metal.jl >= v1.3 + `MPSMatrixRandom` functionality requires Metal.jl >= v2.0 !!! warning `Random.rand!(::MPS.RNG, args...)` and `Random.randn!(::MPS.RNG, args...)` have a framework limitation that requires the byte offset and byte size of the destination array to be a multiple of 4. diff --git a/test/random.jl b/test/random.jl index 40aab582a..8889de8ad 100644 --- a/test/random.jl +++ b/test/random.jl @@ -1,7 +1,3 @@ -using Random -using Metal -using Metal: can_use_mpsrandom - const RAND_TYPES = [Float16, Float32, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64] const RANDN_TYPES = [Float16, Float32] @@ -31,7 +27,7 @@ const OOPLACE_TUPLES = [[(Metal.rand, rand, T) for T in RAND_TYPES]; # specified MPS rng if T != Float16 fill!(A, T(0)) - if can_use_mpsrandom(A) + if Metal.can_use_mpsrandom(A) f(rng, A) @test !iszero(collect(A)) else @@ -51,7 +47,7 @@ const OOPLACE_TUPLES = [[(Metal.rand, rand, T) for T in RAND_TYPES]; # specified MPS rng if T != Float16 fill!(A, T(0)) - if can_use_mpsrandom(A) + if Metal.can_use_mpsrandom(A) f(rng, A) @test Array(A) == fill(1, 0) else @@ -139,7 +135,7 @@ const OOPLACE_TUPLES = [[(Metal.rand, rand, T) for T in RAND_TYPES]; view_A = @view A[idx] # Errors in Julia before crashing whole process - if can_use_mpsrandom(view_A) + if Metal.can_use_mpsrandom(view_A) f(rng, view_A) cpuA = collect(A) @@ -153,7 +149,7 @@ const OOPLACE_TUPLES = [[(Metal.rand, rand, T) for T in RAND_TYPES]; fill!(A, T(0)) idx = 1:51 view_A = @view A[idx] - if can_use_mpsrandom(view_A) + if Metal.can_use_mpsrandom(view_A) f(rng, view_A) cpuA = collect(A) From 69f73ce2843f75c7a2f82b7a727ce20c6ad2b2fc Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 27 Aug 2024 11:20:30 +0200 Subject: [PATCH 13/15] NFC. --- lib/mps/matrixrandom.jl | 2 +- test/mps/copy.jl | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/mps/matrixrandom.jl b/lib/mps/matrixrandom.jl index bd0e602bd..f166159de 100644 --- a/lib/mps/matrixrandom.jl +++ b/lib/mps/matrixrandom.jl @@ -106,7 +106,7 @@ for R in [:MPSMatrixRandomMTGP32, :MPSMatrixRandomPhilox] end end -synchronizeStateOnCommandBuffer(kern::MPSMatrixRandomMTGP32, cmdbuf::MTLCommandBuffer) = +synchronize_state(kern::MPSMatrixRandomMTGP32, cmdbuf::MTLCommandBuffer) = @objc [obj::id{MPSMatrixRandomMTGP32} synchronizeStateOnCommandBuffer:cmdbuf::id{MTLCommandBuffer}]::Nothing diff --git a/test/mps/copy.jl b/test/mps/copy.jl index 0a7bef801..3c3f2ea15 100644 --- a/test/mps/copy.jl +++ b/test/mps/copy.jl @@ -33,7 +33,6 @@ end Ts = Ts[.!(Ts .<: IGNORE_UNION)] @testset "$T" for T in Ts for dim in ((16,16), (10,500), (500,10), (256,512)) - srcMat = MtlArray(rand(T, dim)) dstMat = copytest(srcMat, false, false) From 994cb08716bb1def48d1e2257ea695edc83b32ef Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:44:23 -0300 Subject: [PATCH 14/15] Address review comments --- test/random.jl | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/test/random.jl b/test/random.jl index 8889de8ad..608f03b08 100644 --- a/test/random.jl +++ b/test/random.jl @@ -11,10 +11,6 @@ const OOPLACE_TUPLES = [[(Metal.rand, rand, T) for T in RAND_TYPES]; @testset "in-place" begin rng = Metal.MPS.RNG() - # Seed the default generators to work around value of 0 being - # randomly generated in the size 1 Int8 Array test in 1.11 - Metal.seed!(123) - @testset "$f with $T" for (f, T) in INPLACE_TUPLES @testset "$d" for d in (1, 3, (3, 3), (3, 3, 3), 16, (16, 16), (16, 16, 16), (1000,), (1000,1000)) A = MtlArray{T}(undef, d) @@ -250,8 +246,8 @@ const OOPLACE_TUPLES = [[(Metal.rand, rand, T) for T in RAND_TYPES]; a = f(T, d) Metal.seed!(1) b = f(T, d) - # TODO: Remove once https://github.com/JuliaGPU/Metal.jl/issues/331 is fixed - @test iszero(collect(a) - collect(b)) broken = (T == Float16 && d == (1000,1000)) + # TODO: Remove broken parameter once https://github.com/JuliaGPU/GPUArrays.jl/issues/530 is fixed + @test Array(a) == Array(b) broken = (T == Float16 && d == (1000,1000)) end end end # testset From b729034f4518c0b27346284be1e1f0c7812512e6 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Tue, 27 Aug 2024 16:54:02 -0300 Subject: [PATCH 15/15] Correct version in docstring (and trigger CI) --- docs/src/usage/array.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/usage/array.md b/docs/src/usage/array.md index bd1a9832e..0a121df75 100644 --- a/docs/src/usage/array.md +++ b/docs/src/usage/array.md @@ -147,7 +147,7 @@ julia> a = Random.rand!(GPUArrays.default_rng(MtlArray), a) ``` !!! note - `MPSMatrixRandom` functionality requires Metal.jl >= v2.0 + `MPSMatrixRandom` functionality requires Metal.jl >= v1.4 !!! warning `Random.rand!(::MPS.RNG, args...)` and `Random.randn!(::MPS.RNG, args...)` have a framework limitation that requires the byte offset and byte size of the destination array to be a multiple of 4.