Skip to content

Commit

Permalink
Merge pull request #7 from CliMA/ck/improve_cpu_perf
Browse files Browse the repository at this point in the history
Improve CPU performance implementation
  • Loading branch information
charleskawczynski authored Mar 11, 2024
2 parents 44c58ed + 1760137 commit 1b2a3de
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions test/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,25 @@ if !hasmethod(Base.copyto!, Tuple{<:FusedMultiBroadcast})
end
end

# This is better than the baseline.
function copyto_cpu!(pairs::T, ei::EI) where {T, EI}
@inbounds @simd ivdep for i in ei
MBF.rcopyto_at!(pairs, i)
for (dest, bc) in pairs
@inbounds @simd ivdep for i in ei
dest[i] = bc[i]
end
end
return nothing
end

# This should, in theory be better, but it seems like inlining is
# failing somewhere.
# function copyto_cpu!(pairs::T, ei::EI) where {T, EI}
# @inbounds @simd ivdep for i in ei
# MBF.rcopyto_at!(pairs, i)
# end
# return nothing
# end

import CUDA
import Adapt
function copyto_cuda!(pairs::Tuple) # (Pair(dest1, bc1),Pair(dest2, bc2),...)
Expand Down

0 comments on commit 1b2a3de

Please sign in to comment.