Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
albop committed Jun 1, 2024
1 parent f228dec commit 2f6d73e
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 74 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
Expand All @@ -45,4 +46,3 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b"

86 changes: 53 additions & 33 deletions misc/dev_float32.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,72 +16,92 @@ import oneAPI: oneArray
import Adapt: adapt_structure
# import CUDA: CuArray

import Dolo

wk0 = Dolo.time_iteration_workspace(dm32);

# wk = Dolo.time_iteration_workspace(dm32, dest=CuArray)
wk = Dolo.time_iteration_workspace(dm32, dest=oneArray);
@time wk = Dolo.time_iteration_workspace(dm32, dest=oneArray; interp_mode=:cubic);


wk = adapt(oneArray, wk0)
# wk = adapt(CuArray, wk0)
time_iteration(dm32, wk0; engine=:gpu, tol_η=1e-5, improve_wait=3, improve=true)
time_iteration(dm32, wk; engine=:gpu, tol_η=1e-5, improve_wait=3, improve=true)


using KernelAbstractions: get_backend


t_e = get_backend(wk.x0)

using KernelAbstractions: CPU
using StaticArrays
# list available options

# @time wk = Dolo.time_iteration_workspace(dm32, dest=Array; interp_mode=:cubic);

L0 = Dolo.dF_2(dm32, wk0.x0*0, wk0.φ)
L0_gpu = adapt(oneArray, L0)
# @time Dolo.splines.prefilter!(c0, Val(:CPU))
# @time Dolo.splines.prefilter!(c0, Val(:Threads))
# @time Dolo.splines.prefilter!(c0, Val(:KA))

L = deepcopy(L0)
# @time Dolo.splines.prefilter!(c0)
# @time Dolo.splines.prefilter!(c1,Val(:KA))

@time Dolo.dF_2!(L, dm32, wk0.x0, wk0.φ, CPU())
# @time Dolo.splines.prefilter!(c,Val(:KA))

# @time Dolo.dd_2!(L, dm32, wk0.x0, wk0.φ, CPU())
# c_gpu = adapt(Array, c)

# # Dolo.splines.prefilter!(c)

@time Dolo.dF_2!(L0_gpu, dm32, wk.x0, wk.φ, t_e)

Lg = adapt(Array, L0_gpu)

# t_e = get_backend(wk.x0)

@time L*wk0.x0;
# using KernelAbstractions: CPU
# using StaticArrays

#It works !
using BenchmarkTools
@benchmark Dolo.mul!(wk0.r0, L, wk0.x0, CPU())
@benchmark Dolo.mul!(wk.r0, L0_gpu, wk.x0, t_e)
# try

# res = fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q))
# catch err
# nothing
# end
# L0 = Dolo.dF_2(dm32, wk0.x0*0, wk0.φ)
# L0_gpu = adapt(oneArray, L0)

wk0 = Dolo.time_iteration_workspace(dm32; interp_mode=:cubic)
# wk = adapt(CuArray, wk0)
wk = adapt(oneArray, wk0)
# L = deepcopy(L0)

@time Dolo.time_iteration(dm32, wk, verbose=true, improve=true; engine=:gpu, improve_wait=10, improve_K=500, T=20);
# @time Dolo.dF_2!(L, dm32, wk0.x0, wk0.φ, CPU())

# # @time Dolo.dd_2!(L, dm32, wk0.x0, wk0.φ, CPU())


# @time Dolo.dF_2!(L0_gpu, dm32, wk.x0, wk.φ, t_e)

@time Dolo.time_iteration(dm32, wk0, verbose=true, improve=false; T=20);
# Lg = adapt(Array, L0_gpu)


# @time L*wk0.x0;

# #It works !
# using BenchmarkTools
# @benchmark Dolo.mul!(wk0.r0, L, wk0.x0, CPU())
# @benchmark Dolo.mul!(wk.r0, L0_gpu, wk.x0, t_e)
# # try

using ForwardDiff
# # res = fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q))
# # catch err
# # nothing
# # end

ForwardDiff.derivative(u->unsafe_trunc(u), 0.1)
# wk0 = Dolo.time_iteration_workspace(dm32; interp_mode=:cubic)
# # wk = adapt(CuArray, wk0)
# wk = adapt(oneArray, wk0)

ForwardDiff.jacobian(u->trunc.(Int, u), SVector(0.3, 0.2))
# @time Dolo.time_iteration(dm32, wk, verbose=true, improve=true; engine=:gpu, improve_wait=10, improve_K=500, T=20);

ForwardDiff.jacobian(u->unsafe_trunc.(Int, u), SVector(0.3, 0.2))



# @time Dolo.time_iteration(dm32, wk0, verbose=true, improve=false; T=20);




# using ForwardDiff

# ForwardDiff.derivative(u->unsafe_trunc(u), 0.1)

# ForwardDiff.jacobian(u->trunc.(Int, u), SVector(0.3, 0.2))

# ForwardDiff.jacobian(u->unsafe_trunc.(Int, u), SVector(0.3, 0.2))
144 changes: 105 additions & 39 deletions src/splines/cubic_prefilter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,19 @@ end

# end

const zq = sqrt(3.0)-2.0
const λ = 6.0
const zq_0 = sqrt(3.0)-2.0
const λ_0 = 6.0

getprecision(g::AbstractArray{T}) where T<:Number = T
getprecision(g::AbstractArray{T}) where T<:SVector = eltype(g).types[1].types[1]


function prefilter!(c::AbstractVector{T}) where T


prec = getprecision(c)
λ = convert(prec, λ_0)
zq = convert(prec, zq_0)

N = length(c)
Horizon = (min(20, N-2))
Expand All @@ -101,7 +108,7 @@ function prefilter!(c::AbstractVector{T}) where T
ff = c[N-1]

t1 = zero(eltype(c)) #0.0
zn = 1.0
zn = convert(prec,1.0)
for n in 0:Horizon
t1 += λ*(2*f1-c[2+n])*zn
zn *= zq
Expand All @@ -126,59 +133,41 @@ function prefilter!(c::AbstractVector{T}) where T
end


function prefilter!(data::AbstractArray{T,2}, ::Val{:CPU}) where T

# function prefilter!(data::AbstractVector{T}) where T

# # TODO: find a way to avoid allocation

# # data is 1d array of length M
# # data is 1d array of length M+2
# M = length(data) - 2
# bands = zeros(M+2, 3)

# bb = zeros(T, M+2)

# # fill_bands!(M, bands, bb, data)
# prefilter!(data, bands, bb)

# end

# function prefilter!(data, bands, bb)

# M = length(data) - 2
# fill_bands!(M, bands, bb, data)
# solve_coefficients!(bands, bb, data)

# end

I,J = size(data)

M = J-2
for i=1:I
dat = view(data, i, :)
prefilter!(dat)
end

# function prefilter!(data::Array{T,1}) where T
# M = size(data,1)-2
# bands = zeros(M+2,3)
# bb = zeros(T,M+2)
# fill_bands!(M, bands, bb, data)
# prefilter!(data, bands, bb)
# end
M = I-2
for j=1:J
dat = view(data, :, j)
prefilter!(dat)
end
end

function prefilter!(data::AbstractArray{T,2}) where T
function prefilter!(data::AbstractArray{T,2}, ::Val{:Threads}) where T

I,J = size(data)

M = J-2
for i=1:I
Threads.@threads for i=1:I
dat = view(data, i, :)
prefilter!(dat)
end

M = I-2
for j=1:J
Threads.@threads for j=1:J
dat = view(data, :, j)
prefilter!(dat)
end
end

function prefilter!(data::Array{T,3}) where T
function prefilter!(data::AbstractArray{T,3}) where T

I,J,K = size(data)

Expand Down Expand Up @@ -209,7 +198,7 @@ function prefilter!(data::Array{T,3}) where T
end


function prefilter!(data::Array{T,4}) where T
function prefilter!(data::AbstractArray{T,4}) where T
I,J,K,L = size(data)

M = L-2
Expand Down Expand Up @@ -250,3 +239,80 @@ function prefilter!(data::Array{T,4}) where T
end
end
end

####

function prefilter!(data::AbstractArray{T,1}, ::Val{:CPU}) where T

prefilter!(data)

end

using KernelAbstractions: @kernel


function prefilter!(data::AbstractArray{T,1}, ::Val{:KA}) where T

@kernel function ker(m)

prefilter!(m)

end

backend = get_backend(data)

fun = ker(backend)

fun(data, ndrange=(1,))

end

using KernelAbstractions


using KernelAbstractions: CPU


function prefilter!(data::AbstractArray{T,2}, ::Val{:KA}) where T


@kernel function ker_1(m)

i = @index(Global, Linear)

dat = view(m, i, :)
prefilter!(dat)

end

@kernel function ker_2(m)

j = @index(Global, Linear)

dat = view(m, :, j)
prefilter!(dat)

end


backend = get_backend(data)

if backend==CPU()
# Not clear how to set number of threads here
fun_1 = ker_1(backend, 8)
fun_2 = ker_2(backend, 8)
else
fun_1 = ker_1(backend)
fun_2 = ker_2(backend)
end

I,J = size(data)

fun_1(data, ndrange=(I,))
fun_2(data, ndrange=(J,))

end

using oneAPI: oneArray

prefilter!(data::oneArray) = prefilter!(data, Val(:KA))
2 changes: 1 addition & 1 deletion src/splines/splines.jl
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ end
fill!(spl.θ, zero(eltype(spl.θ)))
ind = tuple( (2:(e[3]+1) for e in spl.grid )...)
spl.θ[ind...] .= rhs
splines.prefilter!(spl.θ)
prefilter!(spl.θ, Val(:KA))
elseif k==1
spl.θ .= rhs
end
Expand Down

0 comments on commit 2f6d73e

Please sign in to comment.