diff --git a/src/nditeration.jl b/src/nditeration.jl index 32d52e26..b933c2be 100644 --- a/src/nditeration.jl +++ b/src/nditeration.jl @@ -36,12 +36,12 @@ end _ind2sub_recuse(::Tuple{}, ind) = (ind+1,) function _ind2sub_recurse(indslast::NTuple{1}, ind) - @inline + Base.@_inline_meta (_lookup(ind, indslast[1]),) end function _ind2sub_recurse(inds, ind) - @inline + Base.@_inline_meta inv = inds[1] indnext, f, l = _div(ind, inv) (ind-l*indnext+f, _ind2sub_recurse(Base.tail(inds), indnext)...) @@ -54,7 +54,7 @@ function _div(ind, inv::SignedMultiplicativeInverse) end function Base._ind2sub(inv::FastCartesianIndices, ind) - @inline + Base.@_inline_meta _ind2sub_recurse(inv.inverses, ind-1) end @@ -151,24 +151,7 @@ Base.length(range::NDRange) = length(blocks(range)) end Base.@propagate_inbounds function expand(ndrange::NDRange{N}, groupidx::Integer, idx::Integer) where {N} - # This causes two sdiv operations, one for each Linear to CartesianIndex return expand(ndrange, blocks(ndrange)[groupidx], workitems(ndrange)[idx]) - - # The formulation below saves one sdiv - # but leads to a different index order... - # previous: julia> expand(ndrange, 1, 32*32) - # CartesianIndex(32, 32) - # now: julia> expand(ndrange, 1, 32*32) - # CartesianIndex(1024, 1) - # B = blocks(ndrange)::CartesianIndices - # W = workitems(ndrange)::CartesianIndices - # Ind = ntuple(Val(N)) do I - # Base.@_inline_meta - # b = B.indices[I] - # w = W.indices[I] - # length(b) * length(w) - # end - # CartesianIndices(Ind)[(groupidx-1)* prod(size(W)) + idx] end Base.@propagate_inbounds function expand(ndrange::NDRange{N}, groupidx::CartesianIndex{N}, idx::Integer) where {N}