From 596e534912da3a3af55b2b8e9644ebcc013f9c2a Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Tue, 7 May 2024 20:21:04 +0200 Subject: [PATCH 01/17] WIP --- Project.toml | 4 +- misc/test_gpu | 0 misc/test_gpu.jl | 340 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 341 insertions(+), 3 deletions(-) create mode 100644 misc/test_gpu create mode 100644 misc/test_gpu.jl diff --git a/Project.toml b/Project.toml index 35ec5134..7a3a4786 100644 --- a/Project.toml +++ b/Project.toml @@ -7,13 +7,13 @@ Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Continuables = "79afa230-ca09-11e8-120b-5decf7bf5e25" Crayons = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" Dolang = "e5c7262c-e9d2-5620-ad8e-1af14eb8a8e3" -Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" @@ -35,7 +35,6 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" ResumableFunctions = "c5292f4c-5179-55e1-98c5-05642aab7184" -ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" Roots = "f2b01f46-fcfa-551c-844a-d8ac1e96c665" SIMD = "fdea26ae-647d-5447-a871-4b548cad5224" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" @@ -43,4 +42,3 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" StringDistances = "88034a9c-02f8-509d-84a9-84ec65e18404" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" -oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" diff --git a/misc/test_gpu b/misc/test_gpu new file mode 100644 index 00000000..e69de29b diff --git a/misc/test_gpu.jl b/misc/test_gpu.jl new file mode 100644 index 00000000..8866f912 --- /dev/null +++ b/misc/test_gpu.jl @@ -0,0 +1,340 @@ +using Dolo +using StaticArrays +using BenchmarkTools +import Dolo.splines: CubicInterpolator +using LoopVectorization +# using CUDAKernels +using KernelAbstractions +using KernelAbstractions: synchronize +using KernelAbstractions: @kernel, @index, @Const +using Adapt: adapt +using CUDA +import Adapt: adapt_structure, adapt +import Adapt + +# using CUDAKernels +using KernelAbstractions +using KernelAbstractions: @kernel, @index, @Const +using CUDA + + +grid = ( + (0.0,1.0,20), + (0.0,1.0,20), + (0.0,1.0,20) +) + + +# C = rand(SVector{2, Float64}, 22,22,22) +C = rand(22,22,22) + +spl = Dolo.SplineInterpolator{typeof(grid), typeof(C), 3}(grid, C) + +# function test(spl; N=100000) + +# out = zeros(eltype(spl.θ), N) +# for n=1:N +# t = sin(n)/2.0+1.0 +# x = SVector(t,1-t,t^2) +# r = spl(x) +# out[n] = r +# end + +# return sum(out) + +# end + + +# @benchmark test(spl) + + +# function test2(spl; N=100000) + +# out = zeros(N) +# @tturbo for n=1:N +# t = sin(n)/2.0+1.0 +# x = SVector(t,1-t,t^2) +# r = spl(x) +# out[n] = r +# # out[n] = r +# end + +# return sum(out) + +# end + +# @benchmark test2(spl) + +# # using CUDAKernels + +# @kernel function ker_(out,(spl)) +# n = @index(Global) +# t = sin(n)/2.0+1.0 +# x = SVector(t,1-t,t^2) +# r = spl(x) +# out[n] = r +# end + +# function test3(spl; N=100000) + +# out = zeros(N) + +# # backend = get_backend(out) + +# kernel! = ker_(CPU()) + +# done = kernel!(out, spl, ndrange=N) +# wait(done) + +# return sum(out) + +# end + +# @benchmark test3(spl) + + +# function test4(spl; N=100000) + +# out = CUDA.zeros(eltype(spl.θ), N) + +# kernel! = ker_(CUDADevice()) + +# done = kernel!(out, spl, ndrange=N) +# wait(done) + +# return sum(out) + +# end + + +adapt_structure(to, s::Dolo.SplineInterpolator{G,C,k}) where G where C where k = let + θθ = adapt(to, s.θ) + CC = typeof(θθ) + Dolo.SplineInterpolator{G,CC,k}(s.grid, θθ) +end + + +gspl = adapt(CuArray, spl) + +# test4(gspl) + +# @benchmark test4(gspl) + + +function orphan(model::DoloYAML.Model{ID, Dom}) where ID where Dom + P = typeof(model.parameters) + DoloYAML.DummyModel{ID, Dom, P}(model.parameters) +end + +function orphan(model::Dolo.YModel) + Dolo.YModel( + Dolo.name(model), + model.states, + model.controls, + model.exogenous, + model.calibration, + orphan(model.source) + ) +end + + +function orphan(model::Dolo.DYModel) + Dolo.DYModel( + orphan(model.model), + model.grid, + model.dproc + ) +end + + + +root_dir = pkgdir(Dolo) +model = include("$(root_dir)/examples/ymodels/rbc_mc.jl") + + +dmodel = Dolo.discretize(model) + + + +# model2 = orphan(model) +# dmodel2 = orphan(dmodel) + + +x0 = Dolo.GVector(dmodel.grid, [Dolo.calibrated(model, :controls) for i=1:length(dmodel.grid)]) + +φ = Dolo.DFun(model, x0) + +Dolo.F(dmodel, x0, φ) + +Dolo.calibrated(model, :controls) + +import Adapt: adapt_storage + +function adapt_structure(to, v::Dolo.GVector{G,V}) where G where V + data = adapt(to, v.data) + Dolo.GVector{G, typeof(data)}( + v.grid, + data + ) +end +function adapt_structure(to, f::Dolo.DFun{Dom, Gar, Itp, vars}) where Dom where Gar where Itp where vars + itp = adapt(to, f.itp) + values = adapt(to, f.values) + Dolo.DFun{Dom, typeof(values), typeof(itp), vars}( + f.domain, + values, + itp + ) +end + +@kernel function ker_F!(out, dmodel, x0, φ) + + n = @index(Global, Linear) + + i,j = Dolo.from_linear(dmodel.grid, n) + # I = @index(Global, Cartesian) + + s0 = Dolo.QP( (i,j), dmodel.grid[n]) + x = x0[n] + + r = Dolo.F(dmodel, s0, x, φ) + + out[n] = r + + # @synchronize +end + +# tt = typeof(model) + + +# struct MyType{A,B} +# a::A +# b::B +# end + +# mm = MyType((1,2,3),(:a, :b)) + + +# source = :(MyType{Tuple{Int64, Int64, Int64}, Tuple{Symbol, Symbol}}) +# dest = :(MyType{Tuple{Int64, Int64, Int64}}) + + +# t_source = eval(source) +# t_dest = eval(dest) + + +function F_cpu(dmodel, x0, φ, K=1000) + + backend = CPU() + + r0 = x0*0 # TODO + + # kernel! = ker_F(CUDADevice()) + kernel! = ker_F!(backend) + + # N = length(dmodel.grid) + N1 = length(dmodel.grid.g1) + N2 = length(dmodel.grid.g2) + for k=1:K + kernel!(r0, dmodel, x0, φ; ndrange=(N1,N2)) + end + + # @synchronize + r0 = adapt(Array, r0) + + return sum(sum(r0.data)) + +end + +@time res = F_cpu(dmodel, x0, φ); + + + +function F_gpu(dmodel, x0, φ, K=1000) + + backend = CUDABackend() + r0 = x0*0 # TODO + r0_gpu = adapt(CuArray, r0) # TODO + x0_gpu = adapt(CuArray, x0) # TODO + φ_gpu = adapt(CuArray, φ) + + # kernel! = ker_F(CUDADevice()) + kernel! = ker_F!(backend) + + # N = length(dmodel.grid) + N1 = length(dmodel.grid.g1) + N2 = length(dmodel.grid.g2) + + for k=1:K + kernel!(r0_gpu, dmodel, x0_gpu, φ_gpu, ndrange=(N1,N2)) + end + r0 = adapt(Array, r0_gpu) + return sum(sum(r0)) + +end + +@time res1 = F_cpu(dmodel, x0, φ); +@time res2 = F_gpu(dmodel, x0, φ); + + + + + + + + +function F_fast!(r0, dmodel, x0, φ) + + dm = orphan(dmodel) + backend = KernelAbstractions.get_backend(x0.data) + + @kernel function ker_FF!(out, dmodel, x0, φ) + + n = @index(Global, Linear) + + i,j = Dolo.from_linear(dmodel.grid, n) + + s0 = Dolo.QP( (i,j), dmodel.grid[n]) + x = x0[n] + r = Dolo.dF_1(dmodel, s0, x, φ) + out[n] = r + end + + kernel! = ker_FF!(backend) + + N = length(dm.grid) + kernel!(r0, dm, x0, φ, ndrange=N) + +end + + + +r0 = Dolo.dF_1(dmodel, x0, φ) +r0_gpu = Adapt.adapt(CuArray,r0) +φ_gpu = Adapt.adapt(CuArray,φ) +x0_gpu = Adapt.adapt(CuArray,x0) + + +function timeit(r0, φ, x0; K=1000) + for k=1:K + F_fast!(r0, dmodel, x0, φ) + end + sum( sum(r0.data) ) +end + +@time timeit(r0, φ, x0; K=1) +@time timeit(r0_gpu, φ_gpu, x0_gpu; K=1) + +@time sum(r0_gpu.data) + + +function timeit_FF(r0, φ, x0; K=1000) + for k=1:K + FF!(r0, dmodel, x0, φ) + end + sum( sum(r0.data) ) +end + +@time timeit_FF(r0, φ, x0; K=1) + +@profview timeit_FF(r0, φ, x0; K=1) From cb30d7a178981a2ba7151a4a46b8b778d19e95a4 Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Mon, 20 May 2024 16:00:23 +0200 Subject: [PATCH 02/17] WIP --- Project.toml | 4 ++++ src/dolo_model.jl | 7 +++++++ src/grids.jl | 10 +++++----- src/processes.jl | 15 ++++++++------- src/space.jl | 34 +++++++++++++++++----------------- 5 files changed, 41 insertions(+), 29 deletions(-) diff --git a/Project.toml b/Project.toml index 7a3a4786..971d1891 100644 --- a/Project.toml +++ b/Project.toml @@ -3,6 +3,7 @@ uuid = "9d24351c-2990-5e1b-a277-04c4b809c898" version = "0.5.0-alpha" [deps] +AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" @@ -14,6 +15,7 @@ DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" Dolang = "e5c7262c-e9d2-5620-ad8e-1af14eb8a8e3" +Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" @@ -42,3 +44,5 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" StringDistances = "88034a9c-02f8-509d-84a9-84ec65e18404" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" +oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" diff --git a/src/dolo_model.jl b/src/dolo_model.jl index 6e37972a..3cb0e7ba 100644 --- a/src/dolo_model.jl +++ b/src/dolo_model.jl @@ -15,6 +15,13 @@ name(::YModel{C,A,B,D,N}) where C where A where B where D where N = N bounds(model::YModel, s) = model.controls + +# TODO: check somewhere that the type of +# states/controls/exogenous +# is the same as calibration + +eltype(model::YModel) = eltype(model.calibration) + function recalibrate(model::YModel; kwargs...) calib = merge(model.calibration, kwargs) YModel(model.states, model.controls, model.exogenous, calib, model.source) diff --git a/src/grids.jl b/src/grids.jl index f514eda3..3d033802 100644 --- a/src/grids.jl +++ b/src/grids.jl @@ -6,8 +6,8 @@ import Base: eltype, iterate, size eltype(cg::AGrid{d}) where d = SVector{d, Float64} ndims(cg::AGrid{d}) where d = d -struct CGrid{d} <: AGrid{d} - ranges::NTuple{d, Tuple{Float64, Float64, Int64}} +struct CGrid{d,Tf} <: AGrid{d} + ranges::NTuple{d, Tuple{Tf, Tf, Int64}} end size(cg::CGrid{d}) where d = tuple((e[3] for e in cg.ranges)... ) @@ -39,8 +39,8 @@ getindex(g::CGrid{1}, ::Colon) = [SVector(i) for i in range(g.ranges[1]...)] -struct SGrid{n,d} <: ASGrid{d} - points::SVector{n,SVector{d, Float64}} +struct SGrid{n,d,T} <: ASGrid{d} + points::SVector{n,SVector{d, T}} end function SGrid(Q::Matrix) @@ -134,7 +134,7 @@ end iterate(s::SGrid) = (s.points[1], 2) iterate(s::SGrid, state) = state<=length(s) ? (s.points[state], state+1) : nothing -eltype(cg::CGrid{d}) where d = SVector{d, Float64} +eltype(cg::CGrid{d,Tf}) where d where Tf = SVector{d, Tf} function iterate(cg::CGrid{d}) where d diff --git a/src/processes.jl b/src/processes.jl index 44f4c461..d06e22fa 100644 --- a/src/processes.jl +++ b/src/processes.jl @@ -302,9 +302,9 @@ end discretize(var::VAR1, d::Dict) = length(d)>=1 ? discretize(var, d[:n]) : discretize(var) -struct MarkovChain{names, d, d2, k} - P::SMatrix{d,d,Float64,d2} - Q::SVector{d, SVector{k, Float64}} +struct MarkovChain{names, d, d2, k, Tf} + P::SMatrix{d,d,Tf,d2} + Q::SVector{d, SVector{k, Tf}} end variables(mc::MarkovChain{names}) where names = names @@ -313,14 +313,15 @@ ndims(mc::MarkovChain{names}) where names = length(names) # MarkovChain(names, P, Q) = MarkovChain{names, typeof(P), typeof(Q)}(P,Q) function MarkovChain(names, P::Matrix, Q::Matrix) + Tf = eltype(P) d = size(P,1) - sm = SMatrix{d,d,Float64,d*d}(P) + sm = SMatrix{d,d,Tf,d*d}(P) sv = SVector( tuple( ( SVector(Q[i,:]...) for i in 1:size(Q,1))...) ) - MarkovChain{names,d,d^2,length(sv[1])}(sm,sv) + MarkovChain{names,d,d^2,length(sv[1]),Tf}(sm,sv) end -MarkovChain(names, P::SMatrix, Q::SVector{d,SVector{k,Float64}}) where d where k = MarkovChain{names, size(P,1), length(P), length(Q[1])}(P, Q) # TODO: specify type arguments -function MarkovChain(P::SMatrix, Q::SVector{d,SVector{k,Float64}}) where d where k +MarkovChain(names, P::SMatrix, Q::SVector{d,SVector{k,Tf}}) where d where k where Tf = MarkovChain{names, size(P,1), length(P), length(Q[1]),Tf}(P, Q) # TODO: specify type arguments +function MarkovChain(P::SMatrix, Q::SVector{d,SVector{k,Tf}}) where d where k where Tf names = tuple((Symbol(string("e", i)) for i=1:k)...) MarkovChain(names, P, Q) end diff --git a/src/space.jl b/src/space.jl index 4e6a3083..33a0332d 100644 --- a/src/space.jl +++ b/src/space.jl @@ -1,31 +1,31 @@ abstract type Space{d} end -struct CartesianSpace{d,dims} +struct CartesianSpace{d,dims,Tf} # names::NTuple{d, Symbol} - min::NTuple{d, Float64} - max::NTuple{d, Float64} + min::NTuple{d, Tf} + max::NTuple{d, Tf} end const CSpace = CartesianSpace -CartesianSpace(a::Tuple{Float64}, b::Tuple{Float64}) = CartesianSpace{length(a), (:x,)}(a,b) -CartesianSpace(a::Tuple{Float64, Float64}, b::Tuple{Float64, Float64}) = CartesianSpace{length(a), (:x_1, :x_2)}(a,b) +CartesianSpace(a::Tuple{Tf}, b::Tuple{Tf}) where Tf = CartesianSpace{length(a), (:x,), Tf}(a,b) +CartesianSpace(a::Tuple{Tf, Tf}, b::Tuple{Tf, Tf}) where Tf = CartesianSpace{length(a), (:x_1, :x_2), Tf}(a,b) -function CartesianSpace(kwargs::Pair{Symbol, Tuple{Float64, Float64}}...) +function CartesianSpace(kwargs::Pair{Symbol, Tuple{Tf, Tf}}...) where Tf names = tuple( (e[1] for e in kwargs)... ) a = tuple( (v[2][1] for v in values(kwargs))... ) b = tuple( (v[2][2] for v in values(kwargs))... ) d = length(names) - return CartesianSpace{d, names}(a,b) + return CartesianSpace{d, names, Tf}(a,b) end CartesianSpace(;kwargs...) = CartesianSpace(kwargs...) -getindex(cs::CartesianSpace{d}, ind::SVector{d, Float64}) where d = ind +getindex(cs::CartesianSpace{d}, ind::SVector{d, Tf}) where d where Tf= ind import Base: in @@ -46,15 +46,15 @@ ndims(dom::CartesianSpace{d, dims}) where d where dims = d variables(dom::CartesianSpace{d,t}) where d where t = t dims(dom::CartesianSpace) = variables(dom) -struct GridSpace{N,d,dims} - points::SVector{N,SVector{d,Float64}} +struct GridSpace{N,d,dims,Tf} + points::SVector{N,SVector{d,Tf}} end const GSpace = GridSpace -GridSpace(v::SVector{N, SVector{d, Float64}}) where d where N = GridSpace{length(v), d, (:i_,)}(SVector(v...)) -GridSpace(v::Vector{SVector{d, Float64}}) where d = GridSpace{length(v), d, (:i_,)}(SVector(v...)) -GridSpace(names, v::SVector{k, SVector{d, Float64}}) where k where d = GridSpace{length(v), d, names}(v) +GridSpace(v::SVector{N, SVector{d, Tf}}) where d where N where Tf = GridSpace{length(v), d, (:i_,), Tf}(SVector(v...)) +GridSpace(v::Vector{SVector{d, Tf}}) where d where Tf = GridSpace{length(v), d, (:i_,), Tf}(SVector(v...)) +GridSpace(names, v::SVector{k, SVector{d, Tf}}) where k where d where Tf = GridSpace{length(v), d, names, Tf}(v) getindex(gs::GridSpace, i::Int64) = gs.points[i] @@ -111,8 +111,8 @@ function dropnames(namedtuple::NamedTuple, names::Tuple{Vararg{Symbol}}) end -QP(space::CartesianSpace{d}; values...) where d = let - s_ =zero(SVector{d,Float64})*NaN +QP(space::CartesianSpace{d, Tf}; values...) where d where Tf = let + s_ =zero(SVector{d,Tf})*NaN s0 = QP(s_,s_) QP(space, s0; values...) end @@ -136,10 +136,10 @@ QP(space::GridSpace{d}; i_=1) where d = QP(i_,space[i_]) -function QP(space::ProductSpace{<:GridSpace{d1},<:CartesianSpace{d2}}; values...) where d1 where d2 +function QP(space::ProductSpace{<:GridSpace{d1},<:CartesianSpace{d2, Tf}}; values...) where d1 where d2 where Tf i0 = get(values, :i_, 1) m_ = space.spaces[1][i0] - s_ =zero(SVector{d2,Float64})*NaN + s_ =zero(SVector{d2,Tf})*NaN s0 = QP((i0,s_),SVector(m_...,s_...)) QP(space, s0; values...) end From 33dad289c3c54b7c4cc4cfabe2efc6ea8673fbab Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Mon, 20 May 2024 16:00:51 +0200 Subject: [PATCH 03/17] WIP --- misc/dev_float32.jl | 7 ++ misc/rbc_base.jl | 179 ++++++++++++++++++++++++++++++++++++++++++++ misc/rbc_float32.jl | 89 ++++++++++++++++++++++ 3 files changed, 275 insertions(+) create mode 100644 misc/dev_float32.jl create mode 100644 misc/rbc_base.jl create mode 100644 misc/rbc_float32.jl diff --git a/misc/dev_float32.jl b/misc/dev_float32.jl new file mode 100644 index 00000000..765415ae --- /dev/null +++ b/misc/dev_float32.jl @@ -0,0 +1,7 @@ +using Dolo + +root_dir = pkgdir(Dolo) +model = include("$(root_dir)/examples/ymodels/rbc_mc.jl") + +dm = Dolo.discretize(model, Dict(:endo=>[10000000]) ) + diff --git a/misc/rbc_base.jl b/misc/rbc_base.jl new file mode 100644 index 00000000..52543b17 --- /dev/null +++ b/misc/rbc_base.jl @@ -0,0 +1,179 @@ + +using StaticArrays +import Dolo: transition, arbitrage +import Dolo + +model = let + + name = :rbc_mc + + # calibrate some parameters + β = 0.99 + σ = 5 + η = 1 + δ = 0.025 + α = 0.33 + ρ = 0.8 + zbar = 0.0 + σ_z = 0.016 + n = 0.33 + z = zbar + rk = 1/β - 1+δ + k = n/(rk/α)^(1/(1-α)) + w = (1-α)*exp(z)*(k/n)^α + y = exp(z)*k^α*n^(1-α) + i = δ*k + c = y - i + χ = w/c^σ/n^η + + calibration = (;β, σ, η, δ, α, ρ, z, n, k, w, y, i, c, χ) + + + P = @SMatrix [0.4 0.6; 0.6 0.4] + # Q = @SMatrix [-0.01; 0.01] + Q = SVector( SVector(-0.01), SVector(0.01) ) + + # P = @SMatrix [1.0;] + # Q = SVector( (SVector(0.0),) ) + + process = Dolo.MarkovChain( (:z,), P, Q ) + + states = Dolo.ProductSpace( + Dolo.GridSpace((:z,), Q), + Dolo.CartesianSpace(; + :k => ( k*0.9, k*1.1), + ) + ) + controls = Dolo.CartesianSpace(; + :i => (0.0, 10.0), + :n => (0.0, 1.5) + ) + + # calibration = (;α, β, γ, δ, ρ,χ, η = 2.0, σ = 2.0, i=0.1, n=0.8) + + + Dolo.YModel(name, states, controls, process, calibration) + +end + + +using LoopVectorization +dmodel = Dolo.discretize(model, Dict(:endo=>10000000)) + +(;φ, x0) = Dolo.time_iteration_workspace(dmodel) + +function FF!(r0, dm, x0, φ) + + MM = length(dm.grid.g1) + a,b,NN = dm.grid.g2.ranges[1] + g1 = dm.grid.g1 + g2 = dm.grid.g2 + + p = dm.model.calibration + + # (;β, σ, η, δ, α, ρ, χ) = dm.model.calibration + + P = dm.model.exogenous.P + + itps = φ.itp + + for i_m=1:MM + + z = g1.points[i_m][1] + + for n_=1:NN + + i,n = x0[i_m, n_] + + k = g2[n_][1] # a+(b-a)*(n_-1)/(N-1) + + y = exp(z)*(k^p.α)*(n^(1-p.α)) + w = (1-p.α)*y/n + + c = y - i + + res1 = 0.0 + res2 = 0.0 + + for i_M=1:MM + + prob = P[i_m,i_M] + Z = g1.points[i_M][1] + + + itp = itps[i_M] + + + K = k*(1-p.δ) + i + + I,N = itp(K) + Y = exp(Z)*(K^p.α)*(N^(1-p.α)) + C = Y - I + + res1 += prob*(p.χ*(n^p.η)*(c^p.σ) - w) + res2 += prob*(p.β*(C/c)^(-p.σ)*(1-p.δ+exp(Z)*p.α*K^(p.α-1)*N^(1-p.α))-1) + + end + + r0[i_m,n_] = SVector(res1,res2) + + end + end + +end + + +function Dolo.transition(model::typeof(model), s::NamedTuple, x::NamedTuple, M::NamedTuple) + + (;δ, ρ) = model.calibration + + # Z = e.Z + K = s.k * (1-δ) + x.i + + (;k=K,) ## This is only the endogenous state + +end + + + +function intermediate(model::typeof(model),s::NamedTuple, x::NamedTuple) + + p = model.calibration + + y = exp(s.z)*(s.k^p.α)*(x.n^(1-p.α)) + w = (1-p.α)*y/x.n + rk = p.α*y/s.k + c = y - x.i + return ( (; y, c, rk, w)) + +end + + +function arbitrage(model::typeof(model), s::NamedTuple, x::NamedTuple, S::NamedTuple, X::NamedTuple) + + p = model.calibration + + y = intermediate(model, s, x) + Y = intermediate(model, S, X) + res_1 = p.χ*(x.n^p.η)*(y.c^p.σ) - y.w + res_2 = (p.β*(y.c/Y.c)^p.σ)*(1 - p.δ + Y.rk) - 1 + + return ( (;res_1, res_2) ) + +end + + +r0 = deepcopy(x0*0) +r1 = deepcopy(x0*0) + + +using BenchmarkTools + + +# not faster ! +@time FF!(r0, dmodel, x0, φ) +@time Dolo.F!(r1, dmodel, x0, φ) + +@benchmark FF!(r0, dmodel, x0, φ) + +@benchmark Dolo.F!(r1, dmodel, x0, φ) \ No newline at end of file diff --git a/misc/rbc_float32.jl b/misc/rbc_float32.jl new file mode 100644 index 00000000..a8fc0986 --- /dev/null +++ b/misc/rbc_float32.jl @@ -0,0 +1,89 @@ +using StaticArrays +import Dolo: transition, arbitrage +import Dolo + +model = let + + name = :rbc_mc + + # calibrate some parameters + β = 0.99 + σ = 5.0 + η = 1.0 + δ = 0.025 + α = 0.33 + ρ = 0.8 + zbar = 0.0 + σ_z = 0.016 + n = 0.33 + z = zbar + rk = 1/β - 1+δ + k = n/(rk/α)^(1/(1-α)) + w = (1-α)*exp(z)*(k/n)^α + y = exp(z)*k^α*n^(1-α) + i = δ*k + c = y - i + χ = w/c^σ/n^η + + calibration = (;β, σ, η, δ, α, ρ, z, n, k, w, y, i, c, χ) + + + P = @SMatrix [0.4 0.6; 0.6 0.4] + # Q = @SMatrix [-0.01; 0.01] + Q = SVector( SVector(-0.01), SVector(0.01) ) + + # P = @SMatrix [1.0;] + # Q = SVector( (SVector(0.0),) ) + + process = Dolo.MarkovChain( (:z,), P, Q ) + + states = Dolo.ProductSpace( + Dolo.GridSpace((:z,), Q), + Dolo.CartesianSpace(; + :k => ( k*0.9, k*1.1), + ) + ) + controls = Dolo.CartesianSpace(; + :i => (0.0, 10.0), + :n => (0.0, 1.5) + ) + + # calibration = (;α, β, γ, δ, ρ,χ, η = 2.0, σ = 2.0, i=0.1, n=0.8) + + + Dolo.YModel(name, states, controls, process, calibration) + +end + +function convert_model(T, model) + + calibration = NamedTuple( ((a,T(b)) for (a,b) in pairs(model.calibration) ) ) + + fun = u->T(u) + P = fun.(model.exogenous.P) + Q = SVector((fun.(e) for e in model.exogenous.Q)...) + + vars = Dolo.variables(model.exogenous) + exogenous = Dolo.MarkovChain(vars, P, Q) + + Dolo.YModel( + names, + model.states, + model.controls, + exogenous, + calibration + ) +end + +model32 = convert_model(Float32,model) + +dmodel = Dolo.discretize(model) + + +dmodel32 = Dolo.discretize(model32) + +wksp = Dolo.time_iteration_workspace(dmodel32) + +(;x0,φ) = wksp + +Dolo.time_iteration(model) \ No newline at end of file From d7d1806c9f8c5e7d6d2ddd43b0eb9fb160a80b6b Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Mon, 20 May 2024 19:10:16 +0200 Subject: [PATCH 04/17] WIP --- misc/dev_float32.jl | 1 + misc/rbc_base.jl | 8 ++--- misc/rbc_float32.jl | 28 +++++---------- misc/test_gpu.jl | 8 ++--- src/Dolo.jl | 4 +++ src/algos/simul.jl | 6 ++-- src/algos/time_iteration.jl | 4 ++- src/algos/time_iteration_accelerated.jl | 12 +++---- src/conversion.jl | 39 +++++++++++++++++++++ src/dolo_model.jl | 4 +++ src/funs.jl | 12 +++---- src/garray.jl | 16 ++++----- src/grids.jl | 45 ++++++++++++++----------- 13 files changed, 115 insertions(+), 72 deletions(-) create mode 100644 src/conversion.jl diff --git a/misc/dev_float32.jl b/misc/dev_float32.jl index 765415ae..e7f54566 100644 --- a/misc/dev_float32.jl +++ b/misc/dev_float32.jl @@ -5,3 +5,4 @@ model = include("$(root_dir)/examples/ymodels/rbc_mc.jl") dm = Dolo.discretize(model, Dict(:endo=>[10000000]) ) +Dolo.convert \ No newline at end of file diff --git a/misc/rbc_base.jl b/misc/rbc_base.jl index 52543b17..a29612cd 100644 --- a/misc/rbc_base.jl +++ b/misc/rbc_base.jl @@ -64,10 +64,10 @@ dmodel = Dolo.discretize(model, Dict(:endo=>10000000)) function FF!(r0, dm, x0, φ) - MM = length(dm.grid.g1) - a,b,NN = dm.grid.g2.ranges[1] - g1 = dm.grid.g1 - g2 = dm.grid.g2 + MM = length(dm.grid.grids[1]) + a,b,NN = dm.grid.grids[2].ranges[1] + g1 = dm.grid.grids[1] + g2 = dm.grid.grids[2] p = dm.model.calibration diff --git a/misc/rbc_float32.jl b/misc/rbc_float32.jl index a8fc0986..3958bab9 100644 --- a/misc/rbc_float32.jl +++ b/misc/rbc_float32.jl @@ -55,35 +55,23 @@ model = let end -function convert_model(T, model) - calibration = NamedTuple( ((a,T(b)) for (a,b) in pairs(model.calibration) ) ) +dmodel = Dolo.discretize(model) - fun = u->T(u) - P = fun.(model.exogenous.P) - Q = SVector((fun.(e) for e in model.exogenous.Q)...) - vars = Dolo.variables(model.exogenous) - exogenous = Dolo.MarkovChain(vars, P, Q) +model32 = Dolo.convert_precision(Float32,model) +dmodel32 = Dolo.discretize(model32) - Dolo.YModel( - names, - model.states, - model.controls, - exogenous, - calibration - ) -end +wksp = Dolo.time_iteration_workspace(dmodel32) -model32 = convert_model(Float32,model) +itps = wksp.φ.itp -dmodel = Dolo.discretize(model) +(;x0,r0, φ) = wksp + +Dolo.F(dmodel32, x0, φ) -dmodel32 = Dolo.discretize(model32) -wksp = Dolo.time_iteration_workspace(dmodel32) -(;x0,φ) = wksp Dolo.time_iteration(model) \ No newline at end of file diff --git a/misc/test_gpu.jl b/misc/test_gpu.jl index 8866f912..c41c933e 100644 --- a/misc/test_gpu.jl +++ b/misc/test_gpu.jl @@ -233,8 +233,8 @@ function F_cpu(dmodel, x0, φ, K=1000) kernel! = ker_F!(backend) # N = length(dmodel.grid) - N1 = length(dmodel.grid.g1) - N2 = length(dmodel.grid.g2) + N1 = length(dmodel.grid.grids[1]) + N2 = length(dmodel.grid.grids[2]) for k=1:K kernel!(r0, dmodel, x0, φ; ndrange=(N1,N2)) end @@ -262,8 +262,8 @@ function F_gpu(dmodel, x0, φ, K=1000) kernel! = ker_F!(backend) # N = length(dmodel.grid) - N1 = length(dmodel.grid.g1) - N2 = length(dmodel.grid.g2) + N1 = length(dmodel.grid.grids[1]) + N2 = length(dmodel.grid.grids[2]) for k=1:K kernel!(r0_gpu, dmodel, x0_gpu, φ_gpu, ndrange=(N1,N2)) diff --git a/src/Dolo.jl b/src/Dolo.jl index 6e4afd66..75ac7a0d 100755 --- a/src/Dolo.jl +++ b/src/Dolo.jl @@ -73,12 +73,16 @@ module Dolo include("garray.jl") include("model.jl") include("funs.jl") + include("conversion.jl") + include("dev_L2.jl") include("algos/simul.jl") include("algos/results.jl") include("algos/time_iteration.jl") include("algos/time_iteration_accelerated.jl") include("algos/vfi.jl") + + include("utils.jl") # function yaml_import(filename) diff --git a/src/algos/simul.jl b/src/algos/simul.jl index 84130aa1..d31bc495 100644 --- a/src/algos/simul.jl +++ b/src/algos/simul.jl @@ -93,7 +93,7 @@ using ResumableFunctions p = model.calibration.p P = model.transition - Q = model.grid.g1.points + Q = model.grid.grids[1].points i = ss[1][1] @@ -113,7 +113,7 @@ using ResumableFunctions M = Q[j] S = transition(model, m, s, a, M, p) - for (w, i_S) in trembling__hand(model.grid.g2, S) + for (w, i_S) in trembling__hand(model.grid.grids[2], S) res = ( P[i,j]*w, @@ -121,7 +121,7 @@ using ResumableFunctions ( (linear_index ? to__linear_index(model.grid, (j,i_S)) : (j,i_S)), - SVector(M..., model.grid.g2[i_S]...) + SVector(M..., model.grid.grids[2][i_S]...) ) ) if linear_index diff --git a/src/algos/time_iteration.jl b/src/algos/time_iteration.jl index e84a468e..3ecc7159 100644 --- a/src/algos/time_iteration.jl +++ b/src/algos/time_iteration.jl @@ -130,6 +130,8 @@ using LinearMaps function time_iteration_workspace(dmodel; interp_mode=:linear) + T = eltype(dmodel) + x0 = (Dolo.initial_guess(dmodel)) x1 = deepcopy(x0) x2 = deepcopy(x0) @@ -139,7 +141,7 @@ function time_iteration_workspace(dmodel; interp_mode=:linear) n = length(dx.data[1]) J = GArray( dmodel.grid, - zeros(SMatrix{n,n,Float64,n*n}, N) + zeros(SMatrix{n,n,T,n*n}, N) ) vars = variables(dmodel.model.controls) φ = DFun(dmodel.model.states, x0, vars; interp_mode=interp_mode) diff --git a/src/algos/time_iteration_accelerated.jl b/src/algos/time_iteration_accelerated.jl index 67393d82..1f085ef0 100644 --- a/src/algos/time_iteration_accelerated.jl +++ b/src/algos/time_iteration_accelerated.jl @@ -63,15 +63,15 @@ function F!(r, model, x, φ, engine::Union{CPU, GPU}) fun_cpu = FF_(engine) - # p = length(model.grid.g1) - # q = length(model.grid.g2) + # p = length(model.grid.grids[1]) + # q = length(model.grid.grids[2]) # p,q = size(x) if typeof(model.grid)<:CGrid p = model.grid.ranges[1][3] q = model.grid.ranges[2][3] else - p = length(model.grid.g1) - q = length(model.grid.g2) + p = length(model.grid.grids[1]) + q = length(model.grid.grids[2]) end res = fun_cpu(r, model, x, φ; ndrange=(p,q)) @@ -108,8 +108,8 @@ function dF_1!(out, model, controls::GArray, φ::Union{GArray, DFun}, ::CPU) p = model.grid.ranges[1][3] q = model.grid.ranges[2][3] else - p = length(model.grid.g1) - q = length(model.grid.g2) + p = length(model.grid.grids[1]) + q = length(model.grid.grids[2]) end # p,q = size(out) diff --git a/src/conversion.jl b/src/conversion.jl new file mode 100644 index 00000000..10fea021 --- /dev/null +++ b/src/conversion.jl @@ -0,0 +1,39 @@ + +function convert_precision(T, model::Dolo.YModel) + + # convert calibration + calibration = NamedTuple( ((a,T(b)) for (a,b) in pairs(model.calibration) ) ) + + # convert exogenous shock + fun = u->T(u) + P = fun.(model.exogenous.P) + Q = SVector((fun.(e) for e in model.exogenous.Q)...) + + vars = Dolo.variables(model.exogenous) + exogenous = Dolo.MarkovChain(vars, P, Q) + + # convert states and controls spaces + states = convert_precision(T, model.states) + controls = convert_precision(T, model.controls) + + + Dolo.YModel( + names, + states, + controls, + exogenous, + calibration + ) +end + +function convert_precision(Tnew, cspace::Dolo.CartesianSpace{d, vars, Told}) where Told where d where vars + Dolo.CartesianSpace{d, vars, Tnew}(cspace.min, cspace.max) +end + +function convert_precision(Tnew, gspace::Dolo.GridSpace{N,d,dims,Tf}) where N where d where dims where Tf + Dolo.GridSpace{N,d,dims,Tnew}(gspace.points) +end + +function convert_precision(Tnew, ps::Dolo.ProductSpace) + Dolo.ProductSpace((convert_precision(Tnew, e) for e in ps.spaces)...) +end diff --git a/src/dolo_model.jl b/src/dolo_model.jl index 3cb0e7ba..bd76af4a 100644 --- a/src/dolo_model.jl +++ b/src/dolo_model.jl @@ -55,6 +55,10 @@ struct DYModel{M, G, D} <: ADModel dproc::D end +# TODO: check whether true +eltype(dm::DYModel) = eltype(dm.model) + + name(dm::DYModel) = name(dm.model) bounds(dmodel::DYModel, s) = bounds(dmodel.model, s) diff --git a/src/funs.jl b/src/funs.jl index eaa5344f..84d01540 100644 --- a/src/funs.jl +++ b/src/funs.jl @@ -71,8 +71,8 @@ function DFun(states, values::GVector{G,V}, vars=nothing; interp_mode=:linear) w end # TODO: check values.data[i,:] - sz = (e[3] for e in values.grid.g2.ranges) - itps = tuple( (SplineInterpolator(values.grid.g2.ranges; values=reshape(values[i,:], sz...),k=k) for i=1:length(values.grid.g1) )...) + sz = (e[3] for e in values.grid.grids[2].ranges) + itps = tuple( (SplineInterpolator(values.grid.grids[2].ranges; values=reshape(values[i,:], sz...),k=k) for i=1:length(values.grid.grids[1]) )...) if typeof(vars) <: Nothing if eltype(values) <: Number @@ -92,8 +92,8 @@ function fit!(φ::DFun, x::GVector{G}) where G<:PGrid{<:SGrid, <:CGrid} # This is only for SGrid x CGrid - n_m = length(x.grid.g1) - sz = tuple(n_m, (e[3] for e in x.grid.g2.ranges)...) + n_m = length(x.grid.grids[1]) + sz = tuple(n_m, (e[3] for e in x.grid.grids[2].ranges)...) xx = reshape( view(x.data, :), sz...) rr = tuple( (Colon() for i=1:(Base.ndims(xx)-1))... ) for i=1:length( φ.itp) @@ -125,8 +125,8 @@ end function (f::DFun{A,B,I,vars})(loc::Tuple{Tuple{Int64}, SVector{d2, U}}) where A where B<:GArray{G,V} where V where I where G<:PGrid{G1,G2} where G1<:SGrid where G2<:CGrid where vars where d2 where U # TODO: not beautiful x_ = loc[2] - dd1 = ndims(f.values.grid.g1) - dd2 = ndims(f.values.grid.g2) + dd1 = ndims(f.values.grid.grids[1]) + dd2 = ndims(f.values.grid.grids[2]) x = SVector((x_[i] for i=(dd1+1):(dd1+dd2))...) f.itp[loc[1][1]](x) end diff --git a/src/garray.jl b/src/garray.jl index 69ad9c6e..d89b7fe8 100644 --- a/src/garray.jl +++ b/src/garray.jl @@ -22,20 +22,20 @@ distance(u::SVector, v::SVector) = maximum(abs, u-v) distance(u::GVector, v::GVector) = maximum( t->distance(t[1],t[2]), zip(u.data, v.data)) import Base: size -size(a::GArray{PGrid{G1, G2, d}, T}) where G1 where G2 where d where T = (length(a.grid.g1), length(a.grid.g2)) +size(a::GArray{PGrid{G1, G2, d}, T}) where G1 where G2 where d where T = (length(a.grid.grids[1]), length(a.grid.grids[2])) # TODO: check -getindex(a::GArray{PGrid{G1, G2, d}, T}, i::Int, j::Int) where G1 where G2 where d where T = a.data[ i + length(a.grid.g1)*(j-1)] -getindex(a::GArray{PGrid{G1, G2, d}, T}, i::Int, ::Colon) where G1 where G2 where d where T = [a[i,j] for j=1:length(a.grid.g2)] -getindex(a::GArray{PGrid{G1, G2, d}, T}, ::Colon, j::Int) where G1 where G2 where d where T = [a[i,j] for i=1:length(a.grid.g1)] +getindex(a::GArray{PGrid{G1, G2, d}, T}, i::Int, j::Int) where G1 where G2 where d where T = a.data[ i + length(a.grid.grids[1])*(j-1)] +getindex(a::GArray{PGrid{G1, G2, d}, T}, i::Int, ::Colon) where G1 where G2 where d where T = [a[i,j] for j=1:length(a.grid.grids[2])] +getindex(a::GArray{PGrid{G1, G2, d}, T}, ::Colon, j::Int) where G1 where G2 where d where T = [a[i,j] for i=1:length(a.grid.grids[1])] getindex(a::GArray{PGrid{G1, G2, d}, T}, ::Colon) where G1 where G2 where d where T = a.data # TODO: check function setindex!(a::GArray{PGrid{G1, G2, d}, T}, v, i::Int, j::Int) where G1 where G2 where d where T - (a.data[ i + length(a.grid.g1)*(j-1)] = v) + (a.data[ i + length(a.grid.grids[1])*(j-1)] = v) nothing end @@ -72,8 +72,8 @@ getindex(g::GArray{G,T}, inds::Int64) where G<:AGrid{d} where d where T = g.data # interpolating indexing function (xa::GArray{PGrid{G1, G2, d}, T})(i::Int64, p::SVector{d2, U}) where G1<:SGrid where G2<:CGrid{d2} where d where d2 where T where U - g1 = xa.grid.g1 - g2 = xa.grid.g2 + g1 = xa.grid.grids[1] + g2 = xa.grid.grids[2] dims = tuple(length(g1), (e[3] for e in g2.ranges)... ) # ranges = tuple( (range(e...) for e in g2.ranges)... ) # v = view(reshape(xa.data, dims),i,:) @@ -90,7 +90,7 @@ end function (xa::GArray{PGrid{G1, G2, d}, T})(S::Tuple{Tuple{Int64}, U}) where G1 where G2 where U<:SVector where d where T #### TODO: replace - n_x = ndims(xa.grid.g2) + n_x = ndims(xa.grid.grids[2]) V = S[2] n = length(V) s = SVector((V[i] for i=n-n_x+1:n)...) diff --git a/src/grids.jl b/src/grids.jl index 3d033802..3ccec258 100644 --- a/src/grids.jl +++ b/src/grids.jl @@ -54,13 +54,18 @@ end struct ProductGrid{G1, G2, d} <: AGrid{d} - g1::G1 - g2::G2 - # points::Vector{SVector{d, Float64}} + grids::Tuple{G1, G2} end const PGrid = ProductGrid +function ProductGrid(A::AGrid{d1},B::AGrid{d2}) where d1 where d2 + println(d1) + println(d2) + ProductGrid{typeof(A), typeof(B), d1+d2}( (A,B) ) +end + +# functionProductGrid(A,B) = ProductGrid{typeof(A), typeof(B), ndims(A)+ndims(B)}((A,B)) getindex(g::SGrid{d}, ::Colon) where d = g.points getindex(g::SGrid{d}, i::Int) where d = g.points[i] @@ -72,25 +77,25 @@ cover(m,v::SVector{d,T}) where d where T = SVector{d,T}( (v[i] for i=length(m)+1:length(v))... ) -PGrid(g1::SGrid{n, d1}, g2::CGrid{d2}) where n where d1 where d2 = PGrid{typeof(g1), CGrid{d2}, d1+d2}(g1, g2) +# PGrid(g1::SGrid{n, d1}, g2::CGrid{d2}) where n where d1 where d2 = PGrid{typeof(g1), CGrid{d2}, d1+d2}((g1, g2)) cross(g1::SGrid{d1}, g2::CGrid{d2}) where d1 where d2 = PGrid(g1,g2) # another way to define multi-dimension cartesian grids -PGrid(g1::CGrid{d1}, g2::CGrid{d2}) where d1 where d2 = PGrid{typeof(g1), CGrid{d2}, d1+d2}(g1, g2) +# PGrid(g1::CGrid{d1}, g2::CGrid{d2}) where d1 where d2 = PGrid{typeof(g1), CGrid{d2}, d1+d2}(g1, g2) import Base: getindex -from_linear(g::PGrid{G1, G2, d}, n) where G1 where G2 where d = let x=divrem(n-1, length(g.g1)); (x[2]+1, x[1]+1) end +from_linear(g::PGrid{G1, G2, d}, n) where G1 where G2 where d = let x=divrem(n-1, length(g.grids[1])); (x[2]+1, x[1]+1) end getindex(g::PGrid{G1, G2, d}, n::Int) where G1 where G2 where d = getindex(g, from_linear(g, n)...) function getindex(g::PGrid{G1, G2, d}, i::Int64, j::Int64) where G1<:SGrid{d1} where G2<:CGrid{d2} where d where d1 where d2 - SVector{d,Float64}(g.g1[i]..., g.g2[j]...) + SVector{d,Float64}(g.grids[1][i]..., g.grids[2][j]...) end -getindex(g::PGrid{G1, G2, d}, i::Int64, ::Colon) where G1 where G2 where d = g.g2[:] # TODO: should error if i out of bounds -getindex(g::PGrid{G1, G2, d}, ::Colon, i::Int64) where G1 where G2 where d = g.g1[:] +getindex(g::PGrid{G1, G2, d}, i::Int64, ::Colon) where G1 where G2 where d = g.grids[2][:] # TODO: should error if i out of bounds +getindex(g::PGrid{G1, G2, d}, ::Colon, i::Int64) where G1 where G2 where d = g.grids[1][:] @inline to__linear_index(g::CGrid{2}, ind::Tuple{Int64, Int64}) = let i,j = ind @@ -98,7 +103,7 @@ getindex(g::PGrid{G1, G2, d}, ::Colon, i::Int64) where G1 where G2 where d = g.g return i + p*(j-1) end -@inline to__linear_index(g::PGrid, ind::Tuple{Int64, Int64}) = ind[1] + length(g.g1)*(ind[2]-1) +@inline to__linear_index(g::PGrid, ind::Tuple{Int64, Int64}) = ind[1] + length(g.grids[1])*(ind[2]-1) show(io::IO, g::SGrid{d1, d2}) where d1 where d2 = print(io, "SGrid{$(d1)}") @@ -109,7 +114,7 @@ show(io::IO, g::CGrid{d}) where d = let end -show(io::IO, g::PGrid) = println(io, "$(g.g1)×$(g.g2)") +show(io::IO, g::PGrid) = println(io, "$(g.grids[1])×$(g.grids[2])") import Base: iterate import Base: length @@ -117,7 +122,7 @@ import Base: getindex import Base: setindex! -length(pg::PGrid{G1, G2, d}) where G1 where G2 where d = length(pg.g1)*length(pg.g2) +length(pg::PGrid{G1, G2, d}) where G1 where G2 where d = length(pg.grids[1])*length(pg.grids[2]) length(sg::SGrid{d}) where d = length(sg.points) length(cg::CGrid{d}) where d = prod(e[3] for e in cg.ranges) @@ -172,31 +177,31 @@ end function Base.iterate(g::PGrid{G1, G2, d}) where G1 where G2 where d - x = g.g1[1] - y = g.g2[1] + x = g.grids[1][1] + y = g.grids[2][1] return (SVector{d, Float64}(x...,y...),(y,1,1)) end function Base.iterate(g::PGrid{G1,G2,d},state) where G1 where G2 where d y,i,j=state - if i Date: Mon, 20 May 2024 19:43:30 +0200 Subject: [PATCH 05/17] WIP --- misc/dev_float32.jl | 5 +++-- misc/rbc_float32.jl | 41 +++++++++++++++++++++++++++++-------- src/algos/time_iteration.jl | 10 +++++++-- src/funs.jl | 6 +++--- src/garray.jl | 18 ++++++++-------- src/grids.jl | 15 +++++++++----- src/splines/interp.jl | 6 +++--- src/splines/splines.jl | 12 +++++------ 8 files changed, 76 insertions(+), 37 deletions(-) diff --git a/misc/dev_float32.jl b/misc/dev_float32.jl index e7f54566..f074c9d0 100644 --- a/misc/dev_float32.jl +++ b/misc/dev_float32.jl @@ -1,8 +1,9 @@ using Dolo root_dir = pkgdir(Dolo) -model = include("$(root_dir)/examples/ymodels/rbc_mc.jl") +model32 = include("$(root_dir)/misc/rbc_float32.jl") -dm = Dolo.discretize(model, Dict(:endo=>[10000000]) ) + +dm32 = Dolo.discretize(model, Dict(:endo=>[10000000]) ) Dolo.convert \ No newline at end of file diff --git a/misc/rbc_float32.jl b/misc/rbc_float32.jl index 3958bab9..d6beddf4 100644 --- a/misc/rbc_float32.jl +++ b/misc/rbc_float32.jl @@ -56,22 +56,47 @@ model = let end -dmodel = Dolo.discretize(model) +# now these are orphans +model32 = Dolo.convert_precision(Float32,model) -model32 = Dolo.convert_precision(Float32,model) -dmodel32 = Dolo.discretize(model32) +function Dolo.transition(model::typeof(model32), s::NamedTuple, x::NamedTuple, M::NamedTuple) + + (;δ, ρ) = model.calibration + + # Z = e.Z + K = s.k * (1-δ) + x.i -wksp = Dolo.time_iteration_workspace(dmodel32) + (;k=K,) ## This is only the endogenous state -itps = wksp.φ.itp +end -(;x0,r0, φ) = wksp -Dolo.F(dmodel32, x0, φ) +function intermediate(model::typeof(model32),s::NamedTuple, x::NamedTuple) + + p = model.calibration + y = exp(s.z)*(s.k^p.α)*(x.n^(1-p.α)) + w = (1-p.α)*y/x.n + rk = p.α*y/s.k + c = y - x.i + return ( (; y, c, rk, w)) + +end +function arbitrage(model::typeof(model32), s::NamedTuple, x::NamedTuple, S::NamedTuple, X::NamedTuple) + + p = model.calibration + + y = intermediate(model, s, x) + Y = intermediate(model, S, X) + res_1 = p.χ*(x.n^p.η)*(y.c^p.σ) - y.w + res_2 = (p.β*(y.c/Y.c)^p.σ)*(1 - p.δ + Y.rk) - 1 + + return ( (;res_1, res_2) ) + +end -Dolo.time_iteration(model) \ No newline at end of file +model32 \ No newline at end of file diff --git a/src/algos/time_iteration.jl b/src/algos/time_iteration.jl index 3ecc7159..c8ec90ce 100644 --- a/src/algos/time_iteration.jl +++ b/src/algos/time_iteration.jl @@ -195,6 +195,7 @@ function time_iteration(model::DYModel, lam = 0.5 local η_0 = NaN + local η convergence = false iterations = T @@ -246,6 +247,7 @@ function time_iteration(model::DYModel, ε_n = norm(r0) if ε_nmaximum(abs, u), v.data) @@ -45,7 +45,10 @@ end eltype(g::GArray{G,T}) where G where T = eltype(T) # warning: these functions don't copy any data -ravel(g::GArray) = reinterpret(Float64, g.data) +function ravel(g::GArray) + Tf = eltype(g.data[1]) + reinterpret(Tf, g.data) +end unravel(g::GArray, x) = GArray( g.grid, reinterpret(eltype(g), x) @@ -113,22 +116,21 @@ import Base: *, \, +, -, / *(x::Number, A::GArray{G,T}) where G where T = GArray(A.grid, x .* A.data) -*(A::GArray{G,Vector{T}}, x::SVector{q, Float64}) where G where T <:SMatrix{p, q, Float64, n} where p where q where n = GArray(A.grid, [M*x for M in A.data]) -# *(A::GArray{G,Vector{T}}, x::SLArray{Tuple{q}, Float64, 1, q, names}) where G where T <:SMatrix{p, q, Float64, n} where p where q where n where names = A*SVector(x...) +*(A::GArray{G,Vector{T}}, x::SVector{q, Tf}) where G where T <:SMatrix{p, q, Tf, n} where p where q where n where Tf = GArray(A.grid, [M*x for M in A.data]) -*(A::GArray{G,T}, B::AbstractArray{Float64}) where G where T <:SMatrix{p, q, Float64, n} where p where q where n = +*(A::GArray{G,T}, B::AbstractArray{Tf}) where G where T <:SMatrix{p, q, Tf, n} where p where q where n where Tf = ravel( GArray( A.grid, - A.data .* reinterpret(SVector{q, Float64}, B) + A.data .* reinterpret(SVector{q, Tf}, B) ) ) import Base: convert -function Base.convert(::Type{Matrix}, A::GArray{G,Vector{T}}) where G where T <:SMatrix{p, q, Float64, k} where p where q where k +function Base.convert(::Type{Matrix}, A::GArray{G,Vector{T}}) where G where T <:SMatrix{p, q, Tf, k} where p where q where k where Tf N = length(A.data) n0 = N*p n1 = N*q diff --git a/src/grids.jl b/src/grids.jl index 3ccec258..07062c74 100644 --- a/src/grids.jl +++ b/src/grids.jl @@ -3,7 +3,7 @@ abstract type ASGrid{d} <: AGrid{d} end import Base: eltype, iterate, size -eltype(cg::AGrid{d}) where d = SVector{d, Float64} +# eltype(cg::AGrid{d}) where d = SVector{d, Float64} ndims(cg::AGrid{d}) where d = d struct CGrid{d,Tf} <: AGrid{d} @@ -90,7 +90,8 @@ from_linear(g::PGrid{G1, G2, d}, n) where G1 where G2 where d = let x=divrem(n-1 getindex(g::PGrid{G1, G2, d}, n::Int) where G1 where G2 where d = getindex(g, from_linear(g, n)...) function getindex(g::PGrid{G1, G2, d}, i::Int64, j::Int64) where G1<:SGrid{d1} where G2<:CGrid{d2} where d where d1 where d2 - SVector{d,Float64}(g.grids[1][i]..., g.grids[2][j]...) + Tf = eltype(g) + SVector{d,Tf}(g.grids[1][i]..., g.grids[2][j]...) end @@ -177,17 +178,21 @@ end function Base.iterate(g::PGrid{G1, G2, d}) where G1 where G2 where d + T = eltype(g) x = g.grids[1][1] y = g.grids[2][1] - return (SVector{d, Float64}(x...,y...),(y,1,1)) + return (SVector{d, T}(x...,y...),(y,1,1)) end function Base.iterate(g::PGrid{G1,G2,d},state) where G1 where G2 where d + + T = eltype(g) + y,i,j=state if i Date: Sun, 26 May 2024 16:25:49 +0200 Subject: [PATCH 06/17] WIP --- Project.toml | 1 + misc/dev_float32.jl | 92 ++++++- misc/test_gpu.jl | 14 +- src/Dolo.jl | 1 + src/adapt.jl | 36 +++ src/algos/time_iteration.jl | 20 +- src/algos/time_iteration_accelerated.jl | 6 +- src/grids.jl | 16 +- src/splines/csplines.jl | 12 +- src/splines/interp.jl | 3 +- src/splines/interp_misc.jl | 347 ++++++++++++++++++++++++ src/splines/macros.jl | 70 +++-- 12 files changed, 562 insertions(+), 56 deletions(-) create mode 100644 src/adapt.jl create mode 100644 src/splines/interp_misc.jl diff --git a/Project.toml b/Project.toml index 971d1891..fbd43119 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,7 @@ BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Continuables = "79afa230-ca09-11e8-120b-5decf7bf5e25" Crayons = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" +Cthulhu = "f68482b8-f384-11e8-15f7-abe071a5a75f" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" diff --git a/misc/dev_float32.jl b/misc/dev_float32.jl index f074c9d0..45e3fc08 100644 --- a/misc/dev_float32.jl +++ b/misc/dev_float32.jl @@ -1,9 +1,95 @@ using Dolo root_dir = pkgdir(Dolo) -model32 = include("$(root_dir)/misc/rbc_float32.jl") +model_32 = include("$(root_dir)/misc/rbc_float32.jl") -dm32 = Dolo.discretize(model, Dict(:endo=>[10000000]) ) +model32 = Dolo.convert_precision(Float32, model_32) -Dolo.convert \ No newline at end of file +dm32 = Dolo.discretize(model32, Dict(:endo=>[100]) ) + + +typeof(dm32) + +using Adapt + + +wk0 = Dolo.time_iteration_workspace(dm32) + +import oneAPI: oneArray +import Adapt: adapt_structure + +wk = adapt(oneArray, wk0) +using KernelAbstractions: get_backend + +if typeof(dm32.grid)<:Dolo.CGrid + p = dm32.grid.ranges[1][3] + q = dm32.grid.ranges[2][3] +else + p = length(dm32.grid.grids[1]) + q = length(dm32.grid.grids[2]) +end + +t_e = get_backend(wk.x0) + +using KernelAbstractions +using StaticArrays + +@kernel function ggg(r, dm, x, φ) +# @kernel function fun(dm, r, x) + + + n = @index(Global, Linear) + ind = @index(Global, Cartesian) + (i,j) = ind.I + + + # k = length(dm.grid.grids[1]) + # i_ = (n-1)÷k + # j_ = (n-1) - (i)*κ + # i = i_+1 + # j = j_+1 + + # TODO: special function here + s_ = dm.grid[n] + s = Dolo.QP((i,j), s_) + + xx = x[n] + + # (i,j) = @inline Dolo.from_linear(model.grid, n) + + rr = Dolo.F(dm, s, xx, φ) + + r[n] = rr + +end + + + +fun_ = ggg(t_e) +@time fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q)) + + + +@time Dolo.F!(wk0.r0, dm32, wk0.x0, wk0.φ) + +# try + + # res = fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q)) +# catch err + # nothing +# end + + + +# Dolo.time_iteration(dm32, wk; engine=:gpu) + + +s_0 = dm32.grid[10] +s0 = Dolo.QP((2,2), s_0) +xx0 = wk0.x0[10] + + +fon(dm32, s0, xx0) = sum(w*S.val for (w,S) in Dolo.τ(dm32, s0, xx0)) + +@code_warntype fon(dm32, s0, xx0) \ No newline at end of file diff --git a/misc/test_gpu.jl b/misc/test_gpu.jl index c41c933e..b9eaa033 100644 --- a/misc/test_gpu.jl +++ b/misc/test_gpu.jl @@ -107,12 +107,6 @@ spl = Dolo.SplineInterpolator{typeof(grid), typeof(C), 3}(grid, C) # end -adapt_structure(to, s::Dolo.SplineInterpolator{G,C,k}) where G where C where k = let - θθ = adapt(to, s.θ) - CC = typeof(θθ) - Dolo.SplineInterpolator{G,CC,k}(s.grid, θθ) -end - gspl = adapt(CuArray, spl) @@ -170,6 +164,14 @@ Dolo.calibrated(model, :controls) import Adapt: adapt_storage + + +adapt_structure(to, s::Dolo.SplineInterpolator{G,C,k}) where G where C where k = let + θθ = adapt(to, s.θ) + CC = typeof(θθ) + Dolo.SplineInterpolator{G,CC,k}(s.grid, θθ) +end + function adapt_structure(to, v::Dolo.GVector{G,V}) where G where V data = adapt(to, v.data) Dolo.GVector{G, typeof(data)}( diff --git a/src/Dolo.jl b/src/Dolo.jl index 75ac7a0d..b460bdc0 100755 --- a/src/Dolo.jl +++ b/src/Dolo.jl @@ -83,6 +83,7 @@ module Dolo include("algos/vfi.jl") + include("adapt.jl") include("utils.jl") # function yaml_import(filename) diff --git a/src/adapt.jl b/src/adapt.jl new file mode 100644 index 00000000..10362ef8 --- /dev/null +++ b/src/adapt.jl @@ -0,0 +1,36 @@ +import Adapt: adapt, adapt_structure + +adapt_structure(to, s::SplineInterpolator{G,C,k}) where G where C where k = let + θθ = adapt(to, s.θ) + CC = typeof(θθ) + SplineInterpolator{G,CC,k}(s.grid, θθ) +end + +function adapt_structure(to, v::GVector{G,V}) where G where V + data = adapt(to, v.data) + GVector{G, typeof(data)}( + v.grid, + data + ) +end + +function adapt_structure(to, v::GArray{G,V}) where G where V + data = adapt(to, v.data) + GArray( + v.grid, + data + ) +end +function adapt_structure(to, f::DFun{Dom, Gar, Itp, vars}) where Dom where Gar where Itp where vars + itp = adapt(to, f.itp) + values = adapt(to, f.values) + DFun{Dom, typeof(values), typeof(itp), vars}( + f.domain, + values, + itp + ) +end + +import KernelAbstractions: get_backend + +get_backend(g::GArray) = get_backend(g.data) diff --git a/src/algos/time_iteration.jl b/src/algos/time_iteration.jl index c8ec90ce..f96a18bf 100644 --- a/src/algos/time_iteration.jl +++ b/src/algos/time_iteration.jl @@ -126,9 +126,9 @@ end using LinearMaps +using Adapt - -function time_iteration_workspace(dmodel; interp_mode=:linear) +function time_iteration_workspace(dmodel; interp_mode=:linear, dest=Array) T = eltype(dmodel) @@ -145,7 +145,10 @@ function time_iteration_workspace(dmodel; interp_mode=:linear) ) vars = variables(dmodel.model.controls) φ = DFun(dmodel.model.states, x0, vars; interp_mode=interp_mode) - return (;x0, x1, x2, r0, dx, J, φ) + + tt = (;x0, x1, x2, r0, dx, J, φ) + + return adapt(dest, tt) end @@ -199,16 +202,14 @@ function time_iteration(model::DYModel, convergence = false iterations = T - if engine==:cpu - t_engine = CPU() - elseif engine==:gpu - t_engine = GPU() + (;x0, x1, x2, dx, r0, J, φ) = workspace + + if engine in (:cpu, :gpu) + t_engine = get_backend(workspace.x0) else t_engine = nothing end - (;x0, x1, x2, dx, r0, J, φ) = workspace - ti_trace = trace ? IterationTrace(typeof(φ)[]) : nothing if improve @@ -224,7 +225,6 @@ function time_iteration(model::DYModel, trace && push!(ti_trace.data, deepcopy(φ)) F!(r0, model, x0, φ, t_engine) - # r0 = F(model, x0, φ) ε = norm(r0) diff --git a/src/algos/time_iteration_accelerated.jl b/src/algos/time_iteration_accelerated.jl index 1f085ef0..a4ca41f4 100644 --- a/src/algos/time_iteration_accelerated.jl +++ b/src/algos/time_iteration_accelerated.jl @@ -37,14 +37,12 @@ using KernelAbstractions # end # This is for a PGrid model only -function F!(r, model, x, φ, engine::Union{CPU, GPU}) +function F!(r, model, x, φ, engine) @kernel function FF_(r, @Const(model), @Const(x), @Const(φ)) - # c = @index(Global, Cartesian) - # c = @index(Global, Cartesian) + n = @index(Global, Linear) - # i,j = c.I (i,j) = Dolo.from_linear(model.grid, n) s_ = model.grid[n] diff --git a/src/grids.jl b/src/grids.jl index 07062c74..c4062159 100644 --- a/src/grids.jl +++ b/src/grids.jl @@ -15,7 +15,7 @@ size(cg::CGrid{d}) where d = tuple((e[3] for e in cg.ranges)... ) getindex(g::CGrid{d}, inds::Vararg{Int64,d}) where d = SVector{d}( ( - ( g.ranges[i][1] + (g.ranges[i][2]-g.ranges[i][1])*( (inds[i]-1)/(g.ranges[i][3]-1)) ) + ( g.ranges[i][1] + (g.ranges[i][2]-g.ranges[i][1])*( (inds[i]-1)/(g.ranges[i][3]-convert(eltype(eltype(g)),1))) ) for i=1:d ) ) @@ -85,13 +85,17 @@ cross(g1::SGrid{d1}, g2::CGrid{d2}) where d1 where d2 = PGrid(g1,g2) import Base: getindex -from_linear(g::PGrid{G1, G2, d}, n) where G1 where G2 where d = let x=divrem(n-1, length(g.grids[1])); (x[2]+1, x[1]+1) end +@inline from_linear(g::PGrid{G1, G2, d}, n::Int) where G1 where G2 where d = let x=divrem(n-1, length(g.grids[1])); (x[2]+1, x[1]+1) end -getindex(g::PGrid{G1, G2, d}, n::Int) where G1 where G2 where d = getindex(g, from_linear(g, n)...) +@inline getindex(g::PGrid{G1, G2, d}, n::Int) where G1 where G2 where d = getindex(g, from_linear(g, n)...) -function getindex(g::PGrid{G1, G2, d}, i::Int64, j::Int64) where G1<:SGrid{d1} where G2<:CGrid{d2} where d where d1 where d2 - Tf = eltype(g) - SVector{d,Tf}(g.grids[1][i]..., g.grids[2][j]...) +# eltype(cg::CGrid{d,Tf}) where d where Tf = SVector{d, Tf} + + +@inline function getindex(g::PGrid{G1, G2, d}, i::Int64, j::Int64) where G1<:SGrid{d1} where G2<:CGrid{d2} where d where d1 where d2 + # Tf = eltype(g) + # SVector{d,Tf}(g.grids[1][i]..., g.grids[2][j]...) + SVector(g.grids[1][i]..., g.grids[2][j]...) end diff --git a/src/splines/csplines.jl b/src/splines/csplines.jl index 75a40a91..cdcf0ae3 100644 --- a/src/splines/csplines.jl +++ b/src/splines/csplines.jl @@ -4,21 +4,21 @@ using StaticArrays # old style call -function eval_UC_spline(a, b, orders, C::AbstractArray{Float64}, S::Matrix{Float64}) +function eval_UC_spline(a, b, orders, C::AbstractArray{Tf}, S::Matrix{Tf}) where Tf d,N = size(S) n_x = size(C,1) dims = tuple(size(C)[2:end]...) out = zeros(n_x,N) - CC = reshape(reinterpret(SVector{n_x,Float64},vec(C)),dims) - SS = reshape(reinterpret(SVector{d,Float64},vec(S)),(N,)) - V = reshape(reinterpret(SVector{n_x,Float64},vec(out)),(N,)) + CC = reshape(reinterpret(SVector{n_x,Tf},vec(C)),dims) + SS = reshape(reinterpret(SVector{d,Tf},vec(S)),(N,)) + V = reshape(reinterpret(SVector{n_x,Tf},vec(out)),(N,)) eval_UC_spline!(a, b, orders, CC, SS, V) return out end -function eval_UC_spline(a, b, orders, C::AbstractArray{T, d}, S::Vector{SVector{d,Float64}}) where T where d +function eval_UC_spline(a, b, orders, C::AbstractArray{T, d}, S::Vector{SVector{d,Tf}}) where T where d where Tf N = length(S) vals = zeros(T,N) eval_UC_spline!(a, b, orders, C, S, vals) @@ -26,7 +26,7 @@ function eval_UC_spline(a, b, orders, C::AbstractArray{T, d}, S::Vector{SVector end -@generated function eval_UC_spline!(a, b, orders, C::AbstractArray{T, d}, S::AbstractVector{SVector{d, Float64}}, V::AbstractVector{T}) where d where T +@generated function eval_UC_spline!(a, b, orders, C::AbstractArray{T, d}, S::AbstractVector{SVector{d,Tf}}, V::AbstractVector{T}) where d where T where Tf # d = C.parameters[2]-1 # first dimension of C indexes the splines # the speed penalty of extrapolating points when iterating over point # seems very small so this is the default diff --git a/src/splines/interp.jl b/src/splines/interp.jl index 9b2abfb3..c68a8f1d 100644 --- a/src/splines/interp.jl +++ b/src/splines/interp.jl @@ -58,7 +58,8 @@ function interp(ranges::NTuple{d, Tuple{Tf, Tf, Int64}}, values::AbstractArray{T λ = (x.-(a .+ δ.*i))./δ - i_ = floor.(Int, i) .+ 1 + # i_ = floor.(Int, i) .+ 1 + i_ = unsafe_trunc.(Int, i) .+ 1 M = matextract(values, i_...) diff --git a/src/splines/interp_misc.jl b/src/splines/interp_misc.jl new file mode 100644 index 00000000..8339b170 --- /dev/null +++ b/src/splines/interp_misc.jl @@ -0,0 +1,347 @@ +using StaticArrays +import LoopVectorization: VectorizationBase +import Base: getindex + +getindex(A::Vector{Float64}, i::VectorizationBase.Vec{4,Int64}) = VectorizationBase.Vec{4, Float64}(A[i(1)], A[i(2)], A[i(3)], A[i(4)]) + + +# ## TODO : rewrite the following +# it is currently slower than the custom versions below + +@inline function reduce_tensors(λ::SVector{d1,U}, v::SArray{T,V,d2,W}) where d1 where d2 where T where U where V where W + vv = tuple( (SVector(1-e, e) for e in λ)...) + xx = SVector( (prod(e) for e in Iterators.product(vv...))...) + return sum( xx .* v[:]) +end + +matextract(v::AbstractArray{T,3}, i,j,k) where T = SArray{Tuple{2, 2, 2}, T, 3, 8}( + v[i, j ,k], + v[i+1,j ,k], + v[i ,j+1,k], + v[i+1,j+1,k], + v[i ,j ,k+1], + v[i+1,j ,k+1], + v[i ,j+1,k+1], + v[i+1,j+1,k+1] +) + +matextract(v::AbstractArray{T,2}, i, j) where T = SArray{Tuple{2,2}, T, 2, 4}( + v[i,j], + v[i+1,j], + v[i,j+1], + v[i+1,j+1] +) +matextract(v::AbstractArray{T,1}, i) where T = SArray{Tuple{2}, T, 1, 2}( + v[i], + v[i+1] +) + +function interp(ranges::NTuple{d, Tuple{Float64, Float64, Int64}}, values::AbstractArray{T,d}, x::SVector{d, U}) where d where T where U + + a = SVector( (e[1] for e in ranges)... ) + b = SVector( (e[2] for e in ranges)... ) + n = SVector( (e[3] for e in ranges)... ) + + δ = (b.-a)./(n.-1) + + i = div.( (x.-a), δ) + + i = max.(min.(i, n.-2), 0) + + λ = (x.-(a .+ δ.*i))./δ + + i_ = floor.(Int, i) .+ 1 + + # inds = tuple( (SVector(e,e+1) for e in i_)...) + # return reduce_tensors( λ, values[inds...] ) + + return reduce_tensors( λ, matextract(values, i_...) ) + +end + +function interp(ranges::NTuple{d, Tuple{Float64, Float64, Int64}}, values::AbstractArray{T,d}, x::Vararg{U}) where d where T where U + xx = SVector(x...) + interp(ranges, values, xx) +end + +# @inline function reduce_tensors(λ::SVector{3,U}, v::SArray{T,Float64,3,V}) where d1 where d2 where T where U where V +# (1.0 - λ[3]) * ( +# (1.0 - λ[2])*( +# (1.0 - λ[1])*v[1,1,1] + λ[1]*v[2,1,1] +# ) + λ[2] * ( +# (1.0 - λ[1])*v[1,2,1] + λ[1]*v[2,2,1] +# ) +# ) +# + λ[3] * ( +# (1.0 - λ[2])*( +# (1.0 - λ[1])*v[1,1,2] + λ[1]*v[2,1,2] +# ) + λ[2] * ( +# (1.0 - λ[1])*v[1,2,2] + λ[1]*v[2,2,2] +# ) +# ) +# end + + + + +# @inline function interp(ranges::Tuple{Tuple{Float64, Float64, Int64}}, values::AbstractArray{T}, x::U) where T where U +# # function interp(ranges::Tuple{Tuple{Float64, Float64, Int64}}, values, x) where T where U + +# a = (ranges[1][1]) +# b = (ranges[1][2]) +# n = (ranges[1][3]) + +# δ = (b-a)/(n-1) + +# i = div( (x-a), δ) + +# i = max(min(i, n-2), 0) + +# λ = (x-(a + δ*i))/δ + +# i_ = floor(Int, i) + 1 + +# v0 = values[i_] +# v1 = values[i_+1] + +# (1.0 - λ)*v0 + λ*v1 + +# end + +# @inline function interp(ranges::Tuple{Tuple{Float64, Float64, Int64},Tuple{Float64, Float64, Int64}}, values::AbstractArray{T}, x_1::U, x_2::U) where T where U +# # function interp(ranges::Tuple{Tuple{Float64, Float64, Int64}}, values, x) where T where U + +# a_1 = (ranges[1][1]) +# b_1 = (ranges[1][2]) +# n_1 = (ranges[1][3]) + +# a_2 = (ranges[2][1]) +# b_2 = (ranges[2][2]) +# n_2 = (ranges[2][3]) + +# δ_1 = (b_1-a_1)/(n_1-1) +# δ_2 = (b_2-a_2)/(n_2-1) + +# i_1 = div( (x_1-a_1), δ_1) +# i_2 = div( (x_2-a_2), δ_2) + +# i_1 = max(min(i_1, n_1-2), 0) +# i_2 = max(min(i_2, n_2-2), 0) + +# λ_1 = (x_1-(a_1 + δ_1*i_1))/δ_1 +# λ_2 = (x_2-(a_2 + δ_2*i_2))/δ_2 + +# i_1_ = floor(Int, i_1) + 1 +# i_2_ = floor(Int, i_2) + 1 + +# v00 = values[i_1_, i_2_] +# v01 = values[i_1_, i_2_+1] +# v10 = values[i_1_+1, i_2_] +# v11 = values[i_1_+1, i_2_+1] + +# res = (1.0 - λ_2)*( +# (1.0 - λ_1)*v00 + λ_1*v10 +# ) + λ_2 * ( +# (1.0 - λ_1)*v01 + λ_1*v11 +# ) + +# return res + +# end + + +# @inline function interp(ranges::Tuple{ +# Tuple{Float64, Float64, Int64}, +# Tuple{Float64, Float64, Int64}, +# Tuple{Float64, Float64, Int64} +# }, values::AbstractArray{T}, x_1::U, x_2::U, x_3::U) where T where U + +# a_1 = (ranges[1][1]) +# b_1 = (ranges[1][2]) +# n_1 = (ranges[1][3]) + +# a_2 = (ranges[2][1]) +# b_2 = (ranges[2][2]) +# n_2 = (ranges[2][3]) + +# a_3 = (ranges[3][1]) +# b_3 = (ranges[3][2]) +# n_3 = (ranges[3][3]) + +# δ_1 = (b_1-a_1)/(n_1-1) +# δ_2 = (b_2-a_2)/(n_2-1) +# δ_3 = (b_3-a_3)/(n_3-1) + +# i_1 = div( (x_1-a_1), δ_1) +# i_2 = div( (x_2-a_2), δ_2) +# i_3 = div( (x_3-a_3), δ_3) + + +# i_1 = max(min(i_1, n_1-2), 0) +# i_2 = max(min(i_2, n_2-2), 0) +# i_3 = max(min(i_3, n_3-2), 0) + +# λ_1 = (x_1-(a_1 + δ_1*i_1))/δ_1 +# λ_2 = (x_2-(a_2 + δ_2*i_2))/δ_2 +# λ_3 = (x_3-(a_3 + δ_3*i_3))/δ_3 + +# i_1_ = floor(Int, i_1) + 1 +# i_2_ = floor(Int, i_2) + 1 +# i_3_ = floor(Int, i_3) + 1 + +# v000 = values[i_1_, i_2_, i_3_] +# v001 = values[i_1_, i_2_, i_3_+1] +# v010 = values[i_1_, i_2_+1, i_3_] +# v011 = values[i_1_, i_2_+1, i_3_+1] +# v100 = values[i_1_+1, i_2_, i_3_] +# v101 = values[i_1_+1, i_2_, i_3_+1] +# v110 = values[i_1_+1, i_2_+1, i_3_] +# v111 = values[i_1_+1, i_2_+1, i_3_+1] + + +# res = (1.0 - λ_3) * ( +# (1.0 - λ_2)*( +# (1.0 - λ_1)*v000 + λ_1*v100 +# ) + λ_2 * ( +# (1.0 - λ_1)*v010 + λ_1*v110 +# ) +# ) +# + λ_3 * ( +# (1.0 - λ_2)*( +# (1.0 - λ_1)*v001 + λ_1*v101 +# ) + λ_2 * ( +# (1.0 - λ_1)*v011 + λ_1*v111 +# ) +# ) + +# return res + +# end + + +function vecinterp_1(ranges, values::Vector{T}, x::Vector{<:Number}) where T + N = length(x) + out = zeros(T,N) + for n=1:N + out[n] = interp(ranges, values, x[n]) + end + return out +end + + +function vecinterp_2(ranges, values::Vector{T}, x::Vector{<:Number}) where T + N = length(x) + out = zeros(T,N) + out .= (u->interp(ranges, values, u)).(x) + return out +end + +using LoopVectorization + +function vecinterp_3(ranges, values::Vector{T}, x::Vector{<:Number}) where T + N = length(x) + out = zeros(T,N) + @simd for n=1:N + out[n] = interp(ranges, values, x[n]) + end + return out +end + + +using LoopVectorization + +function vecinterp_4(ranges, values::Vector{T}, X::Vector{<:Number}) where T + + K = length(X) + out = zeros(T,K) + + a = (ranges[1][1]) + b = (ranges[1][2]) + N = (ranges[1][3]) + + δ = (b-a)/(N-1) + + @turbo for n=1:K + + x = X[n] + + i = div( (x-a), δ) + + i = max(min(i, N-2), 0) + + λ = (x-(a + δ*i))/δ + + i_ = floor(Int, i) + 1 + + v0 = values[i_] + v1 = values[i_+1] + # v0 = getindex(values, i_) + # v1 = getindex(values, i_+1) + + out[n] = (1-λ)*v0 + λ*v1 + end + return out +end + +function vecinterp_5(ranges, values::Vector{T}, x::Vector{<:Number}) where T + N = length(x) + out = zeros(T,N) + @turbo for n=1:N + ret = interp(ranges, values, x[n]) + out[n] = ret + end + return out +end + + + + + +# @code_warntype interp(ranges, values, 2.0) + +# xvec = [range(-0.1, 2.1; length=1000)...] +# tvec = xvec .^ 2 + +# yvec = [(interp(ranges, values, x)) for x in xvec] + + +# using ForwardDiff + +# ForwardDiff.jacobian(u->[interp(ranges, values, u[1])], [0.3]) + +# using Plots + +# plot(xvec, tvec) +# plot!(xvec, yvec) + +# using LoopVectorization: VectorizationBase + + +# a,b,N = ranges[1] +# x = VectorizationBase.Vec{4, Float64}(4, 3, 2, 5) +# δ = (b-a)/(N-1) + +# i = div( (x-a), δ) + +# i = max(min(i, N-2), 0) + +# λ = (x-(a + δ*i))/δ + +# i_ = floor(Int, i) + 1 + +# v0 = values[i_] +# v1 = values[i_+1] + +# out[n] = (1-λ)*v0 + λ*v1 + + + +# import Base: getindex +# getindex(A::Vector{T}, i::VectorizationBase.Vec{4,Int64}) where T where d = VectorizationBase.Vec{4, T}(A[i(1)], A[i(2)], A[i(3)], A[i(4)]) + +# getindex(values, i_) + + +# values[i_] + +# i_(1) \ No newline at end of file diff --git a/src/splines/macros.jl b/src/splines/macros.jl index e0955eea..2629ae3f 100644 --- a/src/splines/macros.jl +++ b/src/splines/macros.jl @@ -21,31 +21,59 @@ using StaticArrays # 0.0 0.0 1.0 0.0 # ] -const Ad = [ +const Ad64 = [ [-1.0/6.0 3.0/6.0 -3.0/6.0 1.0/6.0]; [ 3.0/6.0 -6.0/6.0 0.0/6.0 4.0/6.0]; [-3.0/6.0 3.0/6.0 3.0/6.0 1.0/6.0]; [ 1.0/6.0 0.0/6.0 0.0/6.0 0.0/6.0] ] -const dAd = [ +const dAd64 = [ [ 0.0 -0.5 1.0 -0.5]; [ 0.0 1.5 -2.0 0.0]; [ 0.0 -1.5 1.0 0.5]; [ 0.0 0.5 0.0 0.0] ] -const d2Ad = [ +const d2Ad64 = [ [ 0.0 0.0 -1.0 1.0]; [ 0.0 0.0 3.0 -2.0]; [ 0.0 0.0 -3.0 1.0]; [ 0.0 0.0 1.0 0.0] ] +const Ad32 = [ + [-1.0f0/6.0f0 3.0f0/6.0f0 -3.0f0/6.0f0 1.0f0/6.0f0]; + [ 3.0f0/6.0f0 -6.0f0/6.0f0 0.0f0/6.0f0 4.0f0/6.0f0]; + [-3.0f0/6.0f0 3.0f0/6.0f0 3.0f0/6.0f0 1.0f0/6.0f0]; + [ 1.0f0/6.0f0 0.0f0/6.0f0 0.0f0/6.0f0 0.0f0/6.0f0] +] + +const dAd32 = [ + [ 0.0f0 -0.5f0 1.0f0 -0.5f0]; + [ 0.0f0 1.5f0 -2.0f0 0.0f0]; + [ 0.0f0 -1.5f0 1.0f0 0.5f0]; + [ 0.0f0 0.5f0 0.0f0 0.0f0] +] + +const d2Ad32 = [ + [ 0.0f0 0.0f0 -1.0f0 1.0f0]; + [ 0.0f0 0.0f0 3.0f0 -2.0f0]; + [ 0.0f0 0.0f0 -3.0f0 1.0f0]; + [ 0.0f0 0.0f0 1.0f0 0.0f0] +] + U(s,i) = Symbol(string(s,i)) U(s,i,j) = Symbol(string(s,i,"_",j)) -function create_Phi(d, extrap, diff) +function create_Phi(d, extrap, diff; Tf=Float64) + if Tf==Float64 + Ad = Ad64 + dAd = dAd64 + elseif Tf==Float32 + Ad = Ad32 + dAd = dAd32 + end lines = [] for i=1:d block = [] @@ -113,13 +141,13 @@ tensor_prod(["Phi_1", "Phi_2"], Int64[]) # Phi_1_1 * (Phi_2_1 * C[i1 + 1,i2 + tensor_prod(["Phi_1", "dPhi_2"], Int64[]) # Phi_1_1 * (dPhi_2_1 * C[i1 + 1,i2 + 1] + dPhi_2_2 * C[i1 + 1,i2 + 2] + dPhi_2_3 * C[i1 + 1,i2 + 3] + dPhi_2_4 * C[i1 + 1,i2 + 4]) + Phi_1_2 * (dPhi_2_1 * C[i1 + 2,i2 + 1] + dPhi_2_2 * C[i1 + 2,i2 + 2] + dPhi_2_3 * C[i1 + 2,i2 + 3] + dPhi_2_4 * C[i1 + 2,i2 + 4]) + Phi_1_3 * (dPhi_2_1 * C[i1 + 3,i2 + 1] + dPhi_2_2 * C[i1 + 3,i2 + 2] + dPhi_2_3 * C[i1 + 3,i2 + 3] + dPhi_2_4 * C[i1 + 3,i2 + 4]) + Phi_1_4 * (dPhi_2_1 * C[i1 + 4,i2 + 1] + dPhi_2_2 * C[i1 + 4,i2 + 2] + dPhi_2_3 * C[i1 + 4,i2 + 3] + dPhi_2_4 * C[i1 + 4,i2 + 4]) -function create_parameters(d) +function create_parameters(d; Tf=Float64) lines = [] for i=1:d block = quote $(U("M",i)) = orders[$i] $(U("start",i)) = a[$i] - $(U("dinv",i)) = (orders[$i]-1.0)/(b[$i]-a[$i]) + $(U("dinv",i)) = (orders[$i]-convert($Tf,1.0))/(b[$i]-a[$i]) end for ll in block.args push!(lines, ll) @@ -128,20 +156,21 @@ function create_parameters(d) return lines end -function create_local_parameters(d; vectorize=true) +function create_local_parameters(d; vectorize=true, Tf=Float64) lines = [] if vectorize for i=1:d bl = quote $(U("x",i)) = S[n][$i] $(U("u",i)) = ($(U("x",i)) - $(U("start",i)))*$(U("dinv",i)) - $(U("i",i)) = (floor(Int,$(U("u",i)) )) + # $(U("i",i)) = (floor(Int,$(U("u",i)) )) + $(U("i",i)) = unsafe_trunc(Int32, $(U("u",i))) $(U("i",i)) = max( min($(U("i",i)),$(U("M",i))-2), 0 ) $(U("t",i)) = $(U("u",i))-$(U("i",i)) $(U("tp",i,1)) = $(U("t",i))*$(U("t",i))*$(U("t",i)) $(U("tp",i,2)) = $(U("t",i))*$(U("t",i)) $(U("tp",i,3)) = $(U("t",i)) - $(U("tp",i,4)) = 1.0; + $(U("tp",i,4)) = convert($Tf,1.0); #TODO end for ll in bl.args push!(lines, ll) @@ -153,13 +182,14 @@ function create_local_parameters(d; vectorize=true) bl = quote $(U("x",i)) = S[$i] $(U("u",i)) = ($(U("x",i)) - $(U("start",i)))*$(U("dinv",i)) - $(U("i",i)) = (floor(Int,$(U("u",i)) )) + # $(U("i",i)) = (floor(Int,$(U("u",i)) )) + $(U("i",i)) = unsafe_trunc(Int32, $(U("u",i)) ) $(U("i",i)) = max( min($(U("i",i)),$(U("M",i))-2), 0 ) $(U("t",i)) = $(U("u",i))-$(U("i",i)) $(U("tp",i,1)) = $(U("t",i))*$(U("t",i))*$(U("t",i)) $(U("tp",i,2)) = $(U("t",i))*$(U("t",i)) $(U("tp",i,3)) = $(U("t",i)) - $(U("tp",i,4)) = 1.0; + $(U("tp",i,4)) = convert($Tf,1.0); end for ll in bl.args push!(lines, ll) @@ -169,17 +199,17 @@ function create_local_parameters(d; vectorize=true) return lines end -function create_function(d,extrap="natural"; vectorize=true) +function create_function(d,extrap="natural"; vectorize=true, Tf=Float32) if vectorize expr = quote - function $(Symbol(string("eval_UC_spline!")))( a, b, orders, C::Array{T,d}, S::Vector{SVector{d,Float64}}, V::Vector{T}) where d where T - $(create_parameters(d)...) + function $(Symbol(string("eval_UC_spline!")))( a, b, orders, C::Array{T,d}, S::Vector{SVector{d,$Tf}}, V::Vector{T}) where d where T + $(create_parameters(d; Tf=Tf)...) N = size(S,1) # @fastmath @inbounds @simd( # doesn't seem to make any difference for n=1:N - $(create_local_parameters(d)...) - $(create_Phi(d,extrap,false)...) - V[n] = $( tensor_prod([string("Phi_",i) for i=1:d], Int64[]) ) + $(create_local_parameters(d;Tf=Tf)...) + $(create_Phi(d,extrap,false;Tf=Tf)...) + V[n] = $( tensor_prod([string("Phi_",i) for i=1:d], Int64[]; Tf=Tf) ) end # ) end @@ -187,11 +217,11 @@ function create_function(d,extrap="natural"; vectorize=true) else expr = quote function $(Symbol(string("eval_UC_spline")))( a, b, orders, C::Array{T,d}, S::SVector{d,U}) where d where T where U - $(create_parameters(d)...) + $(create_parameters(d,Tf=Tf)...) N = size(S,1) # @fastmath @inbounds @simd( # doesn't seem to make any difference - $(create_local_parameters(d; vectorize=false)...) - $(create_Phi(d,extrap,false)...) + $(create_local_parameters(d; vectorize=false,Tf=Tf)...) + $(create_Phi(d,extrap,false; Tf=Tf)...) V = $( tensor_prod([string("Phi_",i) for i=1:d], Int64[]) ) # ) return V From 31a6252219e2e2d5627ff2d749a4c9323cdf9b25 Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Sun, 26 May 2024 23:19:43 +0200 Subject: [PATCH 07/17] WIP --- misc/dev_float32.jl | 25 ++++++++++++++++++------- misc/rbc_float32.jl | 8 ++++---- src/algos/time_iteration.jl | 6 +++++- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/misc/dev_float32.jl b/misc/dev_float32.jl index 45e3fc08..1269e4c1 100644 --- a/misc/dev_float32.jl +++ b/misc/dev_float32.jl @@ -6,20 +6,24 @@ model_32 = include("$(root_dir)/misc/rbc_float32.jl") model32 = Dolo.convert_precision(Float32, model_32) -dm32 = Dolo.discretize(model32, Dict(:endo=>[100]) ) +dm32 = Dolo.discretize(model32, Dict(:endo=>[1000]) ) typeof(dm32) using Adapt +import oneAPI: oneArray +import Adapt: adapt_structure +import CUDA: CuArray + wk0 = Dolo.time_iteration_workspace(dm32) -import oneAPI: oneArray -import Adapt: adapt_structure +wk = Dolo.time_iteration_workspace(dm32, dest=CuArray) -wk = adapt(oneArray, wk0) +# wk = adapt(oneArray, wk0) +wk = adapt(CuArray, wk0) using KernelAbstractions: get_backend if typeof(dm32.grid)<:Dolo.CGrid @@ -64,14 +68,21 @@ using StaticArrays end - +using BenchmarkTools fun_ = ggg(t_e) + @time fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q)) +@benchmark fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q)) + + +@benchmark Dolo.F!(wk0.r0, dm32, wk0.x0, wk0.φ) + + +Dolo.distance(wk0.r0, adapt(Array,wk.r0)) -@time Dolo.F!(wk0.r0, dm32, wk0.x0, wk0.φ) # try @@ -82,7 +93,7 @@ fun_ = ggg(t_e) -# Dolo.time_iteration(dm32, wk; engine=:gpu) +Dolo.time_iteration(dm32, wk; engine=:gpu) s_0 = dm32.grid[10] diff --git a/misc/rbc_float32.jl b/misc/rbc_float32.jl index d6beddf4..b6b65fb6 100644 --- a/misc/rbc_float32.jl +++ b/misc/rbc_float32.jl @@ -65,7 +65,7 @@ function Dolo.transition(model::typeof(model32), s::NamedTuple, x::NamedTuple, M (;δ, ρ) = model.calibration # Z = e.Z - K = s.k * (1-δ) + x.i + K = s.k * (1f0-δ) + x.i (;k=K,) ## This is only the endogenous state @@ -77,8 +77,8 @@ function intermediate(model::typeof(model32),s::NamedTuple, x::NamedTuple) p = model.calibration - y = exp(s.z)*(s.k^p.α)*(x.n^(1-p.α)) - w = (1-p.α)*y/x.n + y = exp(s.z)*(s.k^p.α)*(x.n^(1f0-p.α)) + w = (1f0-p.α)*y/x.n rk = p.α*y/s.k c = y - x.i return ( (; y, c, rk, w)) @@ -93,7 +93,7 @@ function arbitrage(model::typeof(model32), s::NamedTuple, x::NamedTuple, S::Name y = intermediate(model, s, x) Y = intermediate(model, S, X) res_1 = p.χ*(x.n^p.η)*(y.c^p.σ) - y.w - res_2 = (p.β*(y.c/Y.c)^p.σ)*(1 - p.δ + Y.rk) - 1 + res_2 = (p.β*(y.c/Y.c)^p.σ)*(1f0 - p.δ + Y.rk) - 1f0 return ( (;res_1, res_2) ) diff --git a/src/algos/time_iteration.jl b/src/algos/time_iteration.jl index f96a18bf..95a6334c 100644 --- a/src/algos/time_iteration.jl +++ b/src/algos/time_iteration.jl @@ -148,7 +148,11 @@ function time_iteration_workspace(dmodel; interp_mode=:linear, dest=Array) tt = (;x0, x1, x2, r0, dx, J, φ) - return adapt(dest, tt) + res = NamedTuple( + ( (k=>adapt(dest, v)) for (k,v) in zip(keys(tt),values(tt)) ) + ) + return res + # return adapt(dest, tt) end From f2f67540b0ca973e76befb3b0c766c09305c1f4a Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Mon, 27 May 2024 09:18:24 +0200 Subject: [PATCH 08/17] Almost there ! --- misc/dev_float32.jl | 62 +++------------------- src/adapt.jl | 3 ++ src/algos/time_iteration_accelerated.jl | 69 +++++++++++++++---------- 3 files changed, 53 insertions(+), 81 deletions(-) diff --git a/misc/dev_float32.jl b/misc/dev_float32.jl index 1269e4c1..24be933e 100644 --- a/misc/dev_float32.jl +++ b/misc/dev_float32.jl @@ -6,7 +6,7 @@ model_32 = include("$(root_dir)/misc/rbc_float32.jl") model32 = Dolo.convert_precision(Float32, model_32) -dm32 = Dolo.discretize(model32, Dict(:endo=>[1000]) ) +dm32 = Dolo.discretize(model32, Dict(:endo=>[100000]) ) typeof(dm32) @@ -23,6 +23,7 @@ wk0 = Dolo.time_iteration_workspace(dm32) wk = Dolo.time_iteration_workspace(dm32, dest=CuArray) # wk = adapt(oneArray, wk0) + wk = adapt(CuArray, wk0) using KernelAbstractions: get_backend @@ -39,50 +40,6 @@ t_e = get_backend(wk.x0) using KernelAbstractions using StaticArrays -@kernel function ggg(r, dm, x, φ) -# @kernel function fun(dm, r, x) - - - n = @index(Global, Linear) - ind = @index(Global, Cartesian) - (i,j) = ind.I - - - # k = length(dm.grid.grids[1]) - # i_ = (n-1)÷k - # j_ = (n-1) - (i)*κ - # i = i_+1 - # j = j_+1 - - # TODO: special function here - s_ = dm.grid[n] - s = Dolo.QP((i,j), s_) - - xx = x[n] - - # (i,j) = @inline Dolo.from_linear(model.grid, n) - - rr = Dolo.F(dm, s, xx, φ) - - r[n] = rr - -end - -using BenchmarkTools - -fun_ = ggg(t_e) - -@time fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q)) - - -@benchmark fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q)) - - -@benchmark Dolo.F!(wk0.r0, dm32, wk0.x0, wk0.φ) - - -Dolo.distance(wk0.r0, adapt(Array,wk.r0)) - # try @@ -91,16 +48,13 @@ Dolo.distance(wk0.r0, adapt(Array,wk.r0)) # nothing # end +wk0 = Dolo.time_iteration_workspace(dm32) +wk = adapt(CuArray, wk0) +@time Dolo.time_iteration(dm32, wk, verbose=false; engine=:gpu); - -Dolo.time_iteration(dm32, wk; engine=:gpu) - - -s_0 = dm32.grid[10] -s0 = Dolo.QP((2,2), s_0) -xx0 = wk0.x0[10] +@time Dolo.time_iteration(dm32, wk0, verbose=false; engine=:gpu); -fon(dm32, s0, xx0) = sum(w*S.val for (w,S) in Dolo.τ(dm32, s0, xx0)) +Dolo.distance(adapt(Array,wk.x0) , wk0.x0) -@code_warntype fon(dm32, s0, xx0) \ No newline at end of file +# 357 times faster ! \ No newline at end of file diff --git a/src/adapt.jl b/src/adapt.jl index 10362ef8..42ca3d1c 100644 --- a/src/adapt.jl +++ b/src/adapt.jl @@ -34,3 +34,6 @@ end import KernelAbstractions: get_backend get_backend(g::GArray) = get_backend(g.data) + +import CUDA: CuArray +distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:CuArray = Base.mapreduce(u->maximum(u), max, x.data-y.data) \ No newline at end of file diff --git a/src/algos/time_iteration_accelerated.jl b/src/algos/time_iteration_accelerated.jl index a4ca41f4..e1d9f125 100644 --- a/src/algos/time_iteration_accelerated.jl +++ b/src/algos/time_iteration_accelerated.jl @@ -39,23 +39,30 @@ using KernelAbstractions # This is for a PGrid model only function F!(r, model, x, φ, engine) - @kernel function FF_(r, @Const(model), @Const(x), @Const(φ)) - + @kernel function FF_(r, @Const(dm), @Const(x), @Const(φ)) n = @index(Global, Linear) - (i,j) = Dolo.from_linear(model.grid, n) - - s_ = model.grid[n] - s = QP((i,j), s_) - xx = x[n] - - rr = sum( - w*Dolo.arbitrage(model,s,xx,S,φ(S)) - for (w,S) in Dolo.τ(model, s, xx) - ) + ind = @index(Global, Cartesian) + (i,j) = ind.I + + # k = length(dm.grid.grids[1]) + # i_ = (n-1)÷k + # j_ = (n-1) - (i)*κ + # i = i_+1 + # j = j_+1 - r[i,j] = rr - + # TODO: special function here + s_ = dm.grid[n] + s = Dolo.QP((i,j), s_) + + xx = x[n] + + # (i,j) = @inline Dolo.from_linear(model.grid, n) + + rr = Dolo.F(dm, s, xx, φ) + + r[n] = rr + end fun_cpu = FF_(engine) @@ -79,28 +86,36 @@ end -function dF_1!(out, model, controls::GArray, φ::Union{GArray, DFun}, ::CPU) +function dF_1!(out, model, controls::GArray, φ::Union{GArray, DFun}, engine) - @kernel function FF_(r,@Const(model), @Const(x),@Const(φ) ) + @kernel function FF_(r,@Const(dm), @Const(x),@Const(φ) ) - # c = @index(Global, Cartesian) - # i,j = c.I n = @index(Global, Linear) - # i,j = c.I - (i,j) = Dolo.from_linear(model.grid, n) - - s_ = model.grid[n] - s = QP((i,j), s_) - xx = x[i,j] + ind = @index(Global, Cartesian) + (i,j) = ind.I + + # k = length(dm.grid.grids[1]) + # i_ = (n-1)÷k + # j_ = (n-1) - (i)*κ + # i = i_+1 + # j = j_+1 + # TODO: special function here + s_ = dm.grid[n] + s = Dolo.QP((i,j), s_) + + xx = x[n] + + # (i,j) = @inline Dolo.from_linear(model.grid, n) + rr = ForwardDiff.jacobian(u->F(model, s, u, φ), xx) - - r[i,j] = rr + + r[n] = rr end - fun_cpu = FF_(CPU()) + fun_cpu = FF_(engine) if typeof(model.grid)<:CGrid p = model.grid.ranges[1][3] From f228dec33f317189ce3e3b082eb9c773a84080f1 Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Tue, 28 May 2024 16:34:45 +0200 Subject: [PATCH 09/17] GPUfied splines 1/2 --- Project.toml | 3 +- examples/ymodels/rbc_mc.jl | 3 +- misc/dev_float32.jl | 71 +++++++--- src/adapt.jl | 13 +- src/algos/time_iteration.jl | 36 +++-- src/algos/time_iteration_accelerated.jl | 179 ++++++++++++++---------- src/dev_L2.jl | 53 +++++-- src/garray.jl | 8 +- src/grids.jl | 1 + src/splines/interp.jl | 6 +- src/splines/macros.jl | 30 +++- 11 files changed, 260 insertions(+), 143 deletions(-) diff --git a/Project.toml b/Project.toml index fbd43119..31e1a6b2 100644 --- a/Project.toml +++ b/Project.toml @@ -11,7 +11,6 @@ BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Continuables = "79afa230-ca09-11e8-120b-5decf7bf5e25" Crayons = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" -Cthulhu = "f68482b8-f384-11e8-15f7-abe071a5a75f" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" @@ -30,7 +29,6 @@ LabelledArrays = "2ee39098-c373-598a-b85f-a56591580800" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" LinearMaps = "7a12625a-238d-50fd-b39a-03d52299707e" LinearOperators = "5c8ed15e-5a4c-59e4-a42b-c7e8811fb125" -LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" Optim = "429524aa-4258-5aef-a3af-852621145aeb" @@ -47,3 +45,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" + diff --git a/examples/ymodels/rbc_mc.jl b/examples/ymodels/rbc_mc.jl index a27ae097..c5ec190e 100644 --- a/examples/ymodels/rbc_mc.jl +++ b/examples/ymodels/rbc_mc.jl @@ -1,6 +1,6 @@ using StaticArrays -import Dolo: transition, arbitrage +import Dolo: transition, arbitrage, intermediate model = let @@ -87,6 +87,7 @@ function arbitrage(model::typeof(model), s::NamedTuple, x::NamedTuple, S::NamedT y = intermediate(model, s, x) Y = intermediate(model, S, X) + res_1 = p.χ*(x.n^p.η)*(y.c^p.σ) - y.w res_2 = (p.β*(y.c/Y.c)^p.σ)*(1 - p.δ + Y.rk) - 1 diff --git a/misc/dev_float32.jl b/misc/dev_float32.jl index 24be933e..801bdb2f 100644 --- a/misc/dev_float32.jl +++ b/misc/dev_float32.jl @@ -6,41 +6,57 @@ model_32 = include("$(root_dir)/misc/rbc_float32.jl") model32 = Dolo.convert_precision(Float32, model_32) -dm32 = Dolo.discretize(model32, Dict(:endo=>[100000]) ) - +dm32 = Dolo.discretize(model32, Dict(:endo=>[1000]) ) typeof(dm32) -using Adapt +using Adapt import oneAPI: oneArray import Adapt: adapt_structure -import CUDA: CuArray +# import CUDA: CuArray -wk0 = Dolo.time_iteration_workspace(dm32) +wk0 = Dolo.time_iteration_workspace(dm32); -wk = Dolo.time_iteration_workspace(dm32, dest=CuArray) +# wk = Dolo.time_iteration_workspace(dm32, dest=CuArray) +wk = Dolo.time_iteration_workspace(dm32, dest=oneArray); + + +wk = adapt(oneArray, wk0) +# wk = adapt(CuArray, wk0) -# wk = adapt(oneArray, wk0) -wk = adapt(CuArray, wk0) using KernelAbstractions: get_backend -if typeof(dm32.grid)<:Dolo.CGrid - p = dm32.grid.ranges[1][3] - q = dm32.grid.ranges[2][3] -else - p = length(dm32.grid.grids[1]) - q = length(dm32.grid.grids[2]) -end t_e = get_backend(wk.x0) -using KernelAbstractions +using KernelAbstractions: CPU using StaticArrays +L0 = Dolo.dF_2(dm32, wk0.x0*0, wk0.φ) +L0_gpu = adapt(oneArray, L0) + +L = deepcopy(L0) + +@time Dolo.dF_2!(L, dm32, wk0.x0, wk0.φ, CPU()) + +# @time Dolo.dd_2!(L, dm32, wk0.x0, wk0.φ, CPU()) + + +@time Dolo.dF_2!(L0_gpu, dm32, wk.x0, wk.φ, t_e) + +Lg = adapt(Array, L0_gpu) + + +@time L*wk0.x0; + +#It works ! +using BenchmarkTools +@benchmark Dolo.mul!(wk0.r0, L, wk0.x0, CPU()) +@benchmark Dolo.mul!(wk.r0, L0_gpu, wk.x0, t_e) # try # res = fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q)) @@ -48,13 +64,24 @@ using StaticArrays # nothing # end -wk0 = Dolo.time_iteration_workspace(dm32) -wk = adapt(CuArray, wk0) -@time Dolo.time_iteration(dm32, wk, verbose=false; engine=:gpu); +wk0 = Dolo.time_iteration_workspace(dm32; interp_mode=:cubic) +# wk = adapt(CuArray, wk0) +wk = adapt(oneArray, wk0) + +@time Dolo.time_iteration(dm32, wk, verbose=true, improve=true; engine=:gpu, improve_wait=10, improve_K=500, T=20); + + + + +@time Dolo.time_iteration(dm32, wk0, verbose=true, improve=false; T=20); + + + -@time Dolo.time_iteration(dm32, wk0, verbose=false; engine=:gpu); +using ForwardDiff +ForwardDiff.derivative(u->unsafe_trunc(u), 0.1) -Dolo.distance(adapt(Array,wk.x0) , wk0.x0) +ForwardDiff.jacobian(u->trunc.(Int, u), SVector(0.3, 0.2)) -# 357 times faster ! \ No newline at end of file +ForwardDiff.jacobian(u->unsafe_trunc.(Int, u), SVector(0.3, 0.2)) diff --git a/src/adapt.jl b/src/adapt.jl index 42ca3d1c..90702d08 100644 --- a/src/adapt.jl +++ b/src/adapt.jl @@ -31,9 +31,14 @@ function adapt_structure(to, f::DFun{Dom, Gar, Itp, vars}) where Dom where Gar w ) end -import KernelAbstractions: get_backend +function adapt_structure(to, L::LL{G,D,F}) where G where D where F + LL(L.grid, adapt(to, L.D), adapt(to, L.φ)) +end + -get_backend(g::GArray) = get_backend(g.data) +# should it be merged with the general definition? +# import CUDA: CuArray +# distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:CuArray = Base.mapreduce(u->maximum(u), max, x.data-y.data) -import CUDA: CuArray -distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:CuArray = Base.mapreduce(u->maximum(u), max, x.data-y.data) \ No newline at end of file +import oneAPI: oneArray +distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:oneArray = Base.mapreduce(u->maximum(u), max, x.data-y.data) \ No newline at end of file diff --git a/src/algos/time_iteration.jl b/src/algos/time_iteration.jl index 95a6334c..efce75fe 100644 --- a/src/algos/time_iteration.jl +++ b/src/algos/time_iteration.jl @@ -33,7 +33,7 @@ function F(dmodel::M, s::QP, x::SVector{d,T}, φ::Union{Policy, GArray, DFun}) w # ) r # r::SVector{d,T} - r = complementarities(dmodel.model, s,x,r) + # r = complementarities(dmodel.model, s,x,r) r end @@ -128,7 +128,7 @@ end using LinearMaps using Adapt -function time_iteration_workspace(dmodel; interp_mode=:linear, dest=Array) +function time_iteration_workspace(dmodel; interp_mode=:linear, improve=false, dest=Array) T = eltype(dmodel) @@ -146,13 +146,14 @@ function time_iteration_workspace(dmodel; interp_mode=:linear, dest=Array) vars = variables(dmodel.model.controls) φ = DFun(dmodel.model.states, x0, vars; interp_mode=interp_mode) - tt = (;x0, x1, x2, r0, dx, J, φ) + # if improve + L = Dolo.dF_2(dmodel, x1, φ) + tt = (;x0, x1, x2, r0, dx, J, L, φ) + # else + # tt = (;x0, x1, x2, r0, dx, J, φ) + # end - res = NamedTuple( - ( (k=>adapt(dest, v)) for (k,v) in zip(keys(tt),values(tt)) ) - ) - return res - # return adapt(dest, tt) + return adapt(dest, tt) end @@ -174,6 +175,7 @@ function time_iteration(model::YModel; kwargs...) time_iteration(dmodel, wksp; kwargs2...) end +using oneAPI: oneArray function time_iteration(model::DYModel, workspace=time_iteration_workspace(model); T=500, @@ -199,14 +201,18 @@ function time_iteration(model::DYModel, # mem = typeof(workspace) <: Nothing ? time_iteration_workspace(model) : workspace mbsteps = 5 - lam = 0.5 + + (;x0, x1, x2, dx, r0, J, φ) = workspace + local η_0 = NaN local η convergence = false iterations = T - (;x0, x1, x2, dx, r0, J, φ) = workspace + Tf = eltype(eltype(x0)) + + lam = convert(Tf, 0.5) if engine in (:cpu, :gpu) t_engine = get_backend(workspace.x0) @@ -217,7 +223,7 @@ function time_iteration(model::DYModel, ti_trace = trace ? IterationTrace(typeof(φ)[]) : nothing if improve - J_2 = Dolo.dF_2(model, x1, φ) + J_2 = workspace.L end for t=1:T @@ -306,12 +312,16 @@ function time_iteration(model::DYModel, Dolo.dF_1!(J_1, model, x1, φ, t_engine) Dolo.dF_2!(J_2, model, x1, φ, t_engine) - mul!(J_2, -1.0) + mul!(J_2, convert(Tf,-1.0)) + # J_2.M_ij[:] *= -1.0 Tp = J_1 \ J_2 d = (x1-x0) - x0.data .+= neumann(Tp, d; K=improve_K) + + rhs = neumann(Tp, d; K=improve_K) + + x0.data .+= rhs.data else x0.data .= x1.data diff --git a/src/algos/time_iteration_accelerated.jl b/src/algos/time_iteration_accelerated.jl index e1d9f125..9f565672 100644 --- a/src/algos/time_iteration_accelerated.jl +++ b/src/algos/time_iteration_accelerated.jl @@ -1,40 +1,13 @@ # This starts making a difference when grids have more than 200000 points... using KernelAbstractions +import KernelAbstractions.Extras: @unroll +import KernelAbstractions: get_backend +get_backend(g::GArray) = get_backend(g.data) -# # This is for a CGrid model only -# function F!(r, model, x, φ, ::CPU) - -# @kernel function FF_(r, @Const(model), @Const(x), @Const(φ)) - -# i = @index(Global, Linear) - -# s_ = model.grid[i] -# s = QP((i,), s_) -# xx = x[i] - -# rr = sum( -# w*Dolo.arbitrage(model,s,xx,S,φ(S)) -# for (w,S) in Dolo.τ(model, s, xx) -# ) - -# r[i] = rr - -# end - -# fun_cpu = FF_(CPU()) - - -# p = length(model.grid) - -# # p,q = size(x) - -# res = fun_cpu(r, model, x, φ; ndrange=(p,)) -# wait(res) -# end # This is for a PGrid model only function F!(r, model, x, φ, engine) @@ -42,6 +15,7 @@ function F!(r, model, x, φ, engine) @kernel function FF_(r, @Const(dm), @Const(x), @Const(φ)) n = @index(Global, Linear) + ind = @index(Global, Cartesian) (i,j) = ind.I @@ -68,19 +42,10 @@ function F!(r, model, x, φ, engine) fun_cpu = FF_(engine) - # p = length(model.grid.grids[1]) - # q = length(model.grid.grids[2]) - # p,q = size(x) - if typeof(model.grid)<:CGrid - p = model.grid.ranges[1][3] - q = model.grid.ranges[2][3] - else - p = length(model.grid.grids[1]) - q = length(model.grid.grids[2]) - end - - res = fun_cpu(r, model, x, φ; ndrange=(p,q)) - # wait(res) + sz = size(model.grid) + + res = fun_cpu(r, model, x, φ; ndrange=sz) + end @@ -117,62 +82,91 @@ function dF_1!(out, model, controls::GArray, φ::Union{GArray, DFun}, engine) fun_cpu = FF_(engine) - if typeof(model.grid)<:CGrid - p = model.grid.ranges[1][3] - q = model.grid.ranges[2][3] - else - p = length(model.grid.grids[1]) - q = length(model.grid.grids[2]) - end - # p,q = size(out) + sz = size(model.grid) - res = fun_cpu(out, model, controls, φ; ndrange=(p,q)) + res = fun_cpu(out, model, controls, φ; ndrange=sz) # wait(res) end #### no alloc +function dF_2!(out, dmodel, controls::GArray, φ::DFun, engine) -function dF_2!(L, dmodel, xx::GArray, φ::DFun, ::CPU) + @kernel function FF_(L,@Const(dm), @Const(x),@Const(φ) ) - # for (n,(s,x)) in enumerate(zip(Dolo.enum(dmodel.grid), xx)) - Threads.@threads for n=1:length(dmodel.grid) - i,j = Dolo.from_linear(dmodel.grid, n) - - s_ = dmodel.grid[i,j] - s = QP((i,j), s_) - x = xx[n] - - L.D[n] = tuple( - ( - (; - F_x=w*ForwardDiff.jacobian(u->Dolo.arbitrage(dmodel,s,x,S,u), φ(S)), - S=S - ) - for (w,S) in Dolo.τ(dmodel, s, x) + n = @index(Global, Linear) + ind = @index(Global, Cartesian) + (i,j) = ind.I + + # TODO: special function here + s_ = dm.grid[n] + s = Dolo.QP((i,j), s_) + + xx = x[n] + + tt = tuple( + ( + (; + F_x=w*ForwardDiff.jacobian(u->Dolo.arbitrage(dmodel,s,xx,S,u), φ(S)), + S=S ) + for (w,S) in Dolo.τ(dmodel, s, xx) + ) ...) + L.D[n] = tt + end + fun_cpu = FF_(engine) + sz = size(dmodel.grid) + + res = fun_cpu(out, dmodel, controls, φ; ndrange=sz) + nothing + end -using KernelAbstractions -import KernelAbstractions.Extras: @unroll -function mul!(dr, L2::Dolo.LL, x, ::CPU) +# function dF_2!(L, dmodel, xx::GArray, φ::DFun, ::CPU) + +# # for (n,(s,x)) in enumerate(zip(Dolo.enum(dmodel.grid), xx)) +# Threads.@threads for n=1:length(dmodel.grid) + +# i,j = Dolo.from_linear(dmodel.grid, n) + +# s_ = dmodel.grid[i,j] +# s = QP((i,j), s_) +# x = xx[n] + +# L.D[n] = tuple( +# ( +# (; +# F_x=w*ForwardDiff.jacobian(u->Dolo.arbitrage(dmodel,s,x,S,u), φ(S)), +# S=S +# ) +# for (w,S) in Dolo.τ(dmodel, s, x) +# ) +# ...) + +# end + +# nothing + + +# end + +function mul!(dr, L2::Dolo.LL, x, engine) D = L2.D dφ = L2.φ fit!(dφ, x) - - @kernel function tm_(dr, @Const(L2), @Const(x)) - n = @index(Global) + @kernel function tm_(dr, @Const(D), @Const(dφ)) + n = @index(Global, Linear) t0 = zero(eltype(dr)) @unroll for k=1:length(D[n]) # @unroll provides a 20% gain (;F_x, S) = D[n][k] # profile: slow ? @@ -181,11 +175,42 @@ function mul!(dr, L2::Dolo.LL, x, ::CPU) dr[n] = t0 end - fun = tm_(CPU()) + fun = tm_(engine) K = length(D) - res = fun(dr, L2, x; ndrange=K) + res = fun(dr, D, dφ; ndrange=K) + + synchronize(engine) # wait(res) - res + nothing + +end + + +function \(J, L::Dolo.LL) + + + @kernel function tm_(@Const(J), L) + n = @index(Global) + + elt = L.D[n] + M = J.data[n] + tt = tuple(( + (;F_x=M\d.F_x, S=d.S) + for d in elt + )...) + L.D[n] = tt + + end + + engine = get_backend(J.data) + fun = tm_(engine) + + K = length(J.data) + + nL = deepcopy(L) + res = fun(J, nL; ndrange=K) + + return nL end diff --git a/src/dev_L2.jl b/src/dev_L2.jl index 39c9b997..9ab5f68d 100644 --- a/src/dev_L2.jl +++ b/src/dev_L2.jl @@ -12,18 +12,37 @@ const LF = LL import Base: /,* import LinearAlgebra: mul! +# function mul!(L::LL, x::Number) + +# for n in 1:length(L.D) +# L.D[n] =tuple(( +# (;F_x=x*d.F_x, S=d.S) +# for d in L.D[n] +# )...) +# end +# end + function mul!(L::LL, x::Number) - for n in 1:length(L.D) - L.D[n] =tuple(( + map!( + e->tuple(( (;F_x=x*d.F_x, S=d.S) - for d in L.D[n] - )...) - end + for d in e + )...), + L.D, + L.D + ) + # for n in 1:length(L.D) + # L.D[n] =tuple(( + # (;F_x=x*d.F_x, S=d.S) + # for d in L.D[n] + # )...) + # end end function mul!(dr, L2::LF, x) - mul!(dr, L2::LF, x, CPU()) + engine=get_backend(L2.D) + mul!(dr, L2::LF, x, engine) end @@ -45,10 +64,13 @@ end function *(L::LL, x0) - r = deepcopy(x0) - r.data .*= 0.0 + + # BUG: with deepcopy, doesn' t work + r = duplicate(x0) ###### BUG: this doesn't wokr on the GPU + Tf = getprecision(x0) + r.data .*= convert(Tf, 0.0) mul!(r, L, x0) - return r + end # this takes 0.2 s ! @@ -127,9 +149,14 @@ end function neumann(L2::LF, r0; K=1000, τ_η=1e-10) - dx = deepcopy(r0) - du = deepcopy(r0) - dv = deepcopy(r0) + # TODO + # dx = deepcopy(r0) + # du = deepcopy(r0) + # dv = deepcopy(r0) + + dx = duplicate(r0) + du = duplicate(r0) + dv = duplicate(r0) mem = (;du, dv) @@ -139,7 +166,7 @@ function neumann(L2::LF, r0; K=1000, τ_η=1e-10) end -function neumann!(dx, L2::LF, r0, mem=(;du=deepcopy(r0), dv=deepcopy(r0)); K=1000, τ_η=1e-10) +function neumann!(dx, L2::LF, r0, mem=(;du=duplicate(r0), dv=duplicate(r0)); K=1000, τ_η=1e-10) (; du, dv) = mem diff --git a/src/garray.jl b/src/garray.jl index 07648185..66074419 100644 --- a/src/garray.jl +++ b/src/garray.jl @@ -39,7 +39,7 @@ function setindex!(a::GArray{PGrid{G1, G2, d}, T}, v, i::Int, j::Int) where G1 w nothing end - +getprecision(g::GArray{G,T}) where G where T = eltype(g).types[1].types[1] eltype(g::GArray{G,T}) where G where T = eltype(T) @@ -142,3 +142,9 @@ function Base.convert(::Type{Matrix}, A::GArray{G,Vector{T}}) where G where T <: end return M end + + + +### TODO + +duplicate(g::GArray{G,T}) where G where T = GArray(g.grid, deepcopy(g.data)) \ No newline at end of file diff --git a/src/grids.jl b/src/grids.jl index c4062159..0200e742 100644 --- a/src/grids.jl +++ b/src/grids.jl @@ -111,6 +111,7 @@ end @inline to__linear_index(g::PGrid, ind::Tuple{Int64, Int64}) = ind[1] + length(g.grids[1])*(ind[2]-1) +size(g::PGrid) = tuple((length(g) for g in g.grids)...) show(io::IO, g::SGrid{d1, d2}) where d1 where d2 = print(io, "SGrid{$(d1)}") show(io::IO, g::CGrid{d}) where d = let diff --git a/src/splines/interp.jl b/src/splines/interp.jl index c68a8f1d..725b38ce 100644 --- a/src/splines/interp.jl +++ b/src/splines/interp.jl @@ -1,8 +1,8 @@ using StaticArrays -import LoopVectorization: VectorizationBase import Base: getindex -getindex(A::Vector{Tf}, i::VectorizationBase.Vec{4,Int64}) where Tf = VectorizationBase.Vec{4, Tf}(A[i(1)], A[i(2)], A[i(3)], A[i(4)]) +# import LoopVectorization: VectorizationBase +# getindex(A::Vector{Tf}, i::VectorizationBase.Vec{4,Int64}) where Tf = VectorizationBase.Vec{4, Tf}(A[i(1)], A[i(2)], A[i(3)], A[i(4)]) # ## TODO : rewrite the following @@ -13,7 +13,7 @@ getindex(A::Vector{Tf}, i::VectorizationBase.Vec{4,Int64}) where Tf = Vectorizat return sum( xx .* v[:]) end -@inline function reduce_tensors(λ::SVector{2,U}, M::SArray{T,V,2,W}) where d1 where d2 where T where U where V where W +@inline function reduce_tensors(λ::SVector{2,U}, M::SArray{T,V,2,W}) where T where U where V where W v1 = SVector(1-λ[1],λ[1]) v2 = SVector(1-λ[2],λ[2]) return M[1,1]*v1[1]*v2[1] + M[1,2]*v1[1]*v2[2] + M[2,1]*v1[2]*v2[1] + M[2,2]*v1[2]*v2[2] diff --git a/src/splines/macros.jl b/src/splines/macros.jl index 2629ae3f..5e354d23 100644 --- a/src/splines/macros.jl +++ b/src/splines/macros.jl @@ -160,11 +160,12 @@ function create_local_parameters(d; vectorize=true, Tf=Float64) lines = [] if vectorize for i=1:d + bl = quote $(U("x",i)) = S[n][$i] $(U("u",i)) = ($(U("x",i)) - $(U("start",i)))*$(U("dinv",i)) # $(U("i",i)) = (floor(Int,$(U("u",i)) )) - $(U("i",i)) = unsafe_trunc(Int32, $(U("u",i))) + $(U("i",i)) = unsafe_trunc(Int, $(U("u",i))) $(U("i",i)) = max( min($(U("i",i)),$(U("M",i))-2), 0 ) $(U("t",i)) = $(U("u",i))-$(U("i",i)) $(U("tp",i,1)) = $(U("t",i))*$(U("t",i))*$(U("t",i)) @@ -178,14 +179,29 @@ function create_local_parameters(d; vectorize=true, Tf=Float64) end else lines = [] + e = quote + x_ = SVector(S...) + a_ = SVector( a... ) + b_ = SVector( b... ) + n_ = SVector( orders... ) + δ_ = (b_.-a_)./(n_.-1) + i_ = div.( (x_.-a_), δ_) + i_ = max.(min.(i_, n_.-2), 0) + λ_ = (x_.-(a_ .+ δ_.*i_))./δ_ + ii_ = unsafe_trunc.(Int,i_) + + end + for x in e.args + push!(lines, x) + end for i=1:d bl = quote - $(U("x",i)) = S[$i] - $(U("u",i)) = ($(U("x",i)) - $(U("start",i)))*$(U("dinv",i)) - # $(U("i",i)) = (floor(Int,$(U("u",i)) )) - $(U("i",i)) = unsafe_trunc(Int32, $(U("u",i)) ) - $(U("i",i)) = max( min($(U("i",i)),$(U("M",i))-2), 0 ) - $(U("t",i)) = $(U("u",i))-$(U("i",i)) + + + # compat + $(U("i",i)) = ii_[$i] + $(U("t",i)) = λ_[$i] + $(U("tp",i,1)) = $(U("t",i))*$(U("t",i))*$(U("t",i)) $(U("tp",i,2)) = $(U("t",i))*$(U("t",i)) $(U("tp",i,3)) = $(U("t",i)) From 2f6d73ec67f8ce6ca550b694a7bf05403c060fbe Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Sun, 2 Jun 2024 00:01:43 +0200 Subject: [PATCH 10/17] WIP --- Project.toml | 2 +- misc/dev_float32.jl | 86 ++++++++++++-------- src/splines/cubic_prefilter.jl | 144 ++++++++++++++++++++++++--------- src/splines/splines.jl | 2 +- 4 files changed, 160 insertions(+), 74 deletions(-) diff --git a/Project.toml b/Project.toml index 31e1a6b2..462d832c 100644 --- a/Project.toml +++ b/Project.toml @@ -20,6 +20,7 @@ FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" @@ -45,4 +46,3 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" - diff --git a/misc/dev_float32.jl b/misc/dev_float32.jl index 801bdb2f..719405f2 100644 --- a/misc/dev_float32.jl +++ b/misc/dev_float32.jl @@ -16,72 +16,92 @@ import oneAPI: oneArray import Adapt: adapt_structure # import CUDA: CuArray +import Dolo wk0 = Dolo.time_iteration_workspace(dm32); # wk = Dolo.time_iteration_workspace(dm32, dest=CuArray) -wk = Dolo.time_iteration_workspace(dm32, dest=oneArray); +@time wk = Dolo.time_iteration_workspace(dm32, dest=oneArray; interp_mode=:cubic); -wk = adapt(oneArray, wk0) -# wk = adapt(CuArray, wk0) +time_iteration(dm32, wk0; engine=:gpu, tol_η=1e-5, improve_wait=3, improve=true) +time_iteration(dm32, wk; engine=:gpu, tol_η=1e-5, improve_wait=3, improve=true) -using KernelAbstractions: get_backend -t_e = get_backend(wk.x0) -using KernelAbstractions: CPU -using StaticArrays +# list available options +# @time wk = Dolo.time_iteration_workspace(dm32, dest=Array; interp_mode=:cubic); -L0 = Dolo.dF_2(dm32, wk0.x0*0, wk0.φ) -L0_gpu = adapt(oneArray, L0) +# @time Dolo.splines.prefilter!(c0, Val(:CPU)) +# @time Dolo.splines.prefilter!(c0, Val(:Threads)) +# @time Dolo.splines.prefilter!(c0, Val(:KA)) -L = deepcopy(L0) +# @time Dolo.splines.prefilter!(c0) +# @time Dolo.splines.prefilter!(c1,Val(:KA)) -@time Dolo.dF_2!(L, dm32, wk0.x0, wk0.φ, CPU()) +# @time Dolo.splines.prefilter!(c,Val(:KA)) -# @time Dolo.dd_2!(L, dm32, wk0.x0, wk0.φ, CPU()) +# c_gpu = adapt(Array, c) +# # Dolo.splines.prefilter!(c) -@time Dolo.dF_2!(L0_gpu, dm32, wk.x0, wk.φ, t_e) -Lg = adapt(Array, L0_gpu) +# t_e = get_backend(wk.x0) -@time L*wk0.x0; +# using KernelAbstractions: CPU +# using StaticArrays -#It works ! -using BenchmarkTools -@benchmark Dolo.mul!(wk0.r0, L, wk0.x0, CPU()) -@benchmark Dolo.mul!(wk.r0, L0_gpu, wk.x0, t_e) -# try - # res = fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q)) -# catch err - # nothing -# end +# L0 = Dolo.dF_2(dm32, wk0.x0*0, wk0.φ) +# L0_gpu = adapt(oneArray, L0) -wk0 = Dolo.time_iteration_workspace(dm32; interp_mode=:cubic) -# wk = adapt(CuArray, wk0) -wk = adapt(oneArray, wk0) +# L = deepcopy(L0) -@time Dolo.time_iteration(dm32, wk, verbose=true, improve=true; engine=:gpu, improve_wait=10, improve_K=500, T=20); +# @time Dolo.dF_2!(L, dm32, wk0.x0, wk0.φ, CPU()) +# # @time Dolo.dd_2!(L, dm32, wk0.x0, wk0.φ, CPU()) +# @time Dolo.dF_2!(L0_gpu, dm32, wk.x0, wk.φ, t_e) -@time Dolo.time_iteration(dm32, wk0, verbose=true, improve=false; T=20); +# Lg = adapt(Array, L0_gpu) +# @time L*wk0.x0; +# #It works ! +# using BenchmarkTools +# @benchmark Dolo.mul!(wk0.r0, L, wk0.x0, CPU()) +# @benchmark Dolo.mul!(wk.r0, L0_gpu, wk.x0, t_e) +# # try -using ForwardDiff +# # res = fun_(wk.r0, dm32, wk.x0, wk.φ; ndrange=(p,q)) +# # catch err +# # nothing +# # end -ForwardDiff.derivative(u->unsafe_trunc(u), 0.1) +# wk0 = Dolo.time_iteration_workspace(dm32; interp_mode=:cubic) +# # wk = adapt(CuArray, wk0) +# wk = adapt(oneArray, wk0) -ForwardDiff.jacobian(u->trunc.(Int, u), SVector(0.3, 0.2)) +# @time Dolo.time_iteration(dm32, wk, verbose=true, improve=true; engine=:gpu, improve_wait=10, improve_K=500, T=20); -ForwardDiff.jacobian(u->unsafe_trunc.(Int, u), SVector(0.3, 0.2)) + + + +# @time Dolo.time_iteration(dm32, wk0, verbose=true, improve=false; T=20); + + + + +# using ForwardDiff + +# ForwardDiff.derivative(u->unsafe_trunc(u), 0.1) + +# ForwardDiff.jacobian(u->trunc.(Int, u), SVector(0.3, 0.2)) + +# ForwardDiff.jacobian(u->unsafe_trunc.(Int, u), SVector(0.3, 0.2)) diff --git a/src/splines/cubic_prefilter.jl b/src/splines/cubic_prefilter.jl index 34c20319..e6ff2f74 100644 --- a/src/splines/cubic_prefilter.jl +++ b/src/splines/cubic_prefilter.jl @@ -87,12 +87,19 @@ end # end -const zq = sqrt(3.0)-2.0 -const λ = 6.0 +const zq_0 = sqrt(3.0)-2.0 +const λ_0 = 6.0 +getprecision(g::AbstractArray{T}) where T<:Number = T +getprecision(g::AbstractArray{T}) where T<:SVector = eltype(g).types[1].types[1] function prefilter!(c::AbstractVector{T}) where T + + + prec = getprecision(c) + λ = convert(prec, λ_0) + zq = convert(prec, zq_0) N = length(c) Horizon = (min(20, N-2)) @@ -101,7 +108,7 @@ function prefilter!(c::AbstractVector{T}) where T ff = c[N-1] t1 = zero(eltype(c)) #0.0 - zn = 1.0 + zn = convert(prec,1.0) for n in 0:Horizon t1 += λ*(2*f1-c[2+n])*zn zn *= zq @@ -126,59 +133,41 @@ function prefilter!(c::AbstractVector{T}) where T end +function prefilter!(data::AbstractArray{T,2}, ::Val{:CPU}) where T -# function prefilter!(data::AbstractVector{T}) where T - -# # TODO: find a way to avoid allocation - -# # data is 1d array of length M -# # data is 1d array of length M+2 -# M = length(data) - 2 -# bands = zeros(M+2, 3) - -# bb = zeros(T, M+2) - -# # fill_bands!(M, bands, bb, data) -# prefilter!(data, bands, bb) - -# end - -# function prefilter!(data, bands, bb) - -# M = length(data) - 2 -# fill_bands!(M, bands, bb, data) -# solve_coefficients!(bands, bb, data) - -# end - + I,J = size(data) + M = J-2 + for i=1:I + dat = view(data, i, :) + prefilter!(dat) + end -# function prefilter!(data::Array{T,1}) where T -# M = size(data,1)-2 -# bands = zeros(M+2,3) -# bb = zeros(T,M+2) -# fill_bands!(M, bands, bb, data) -# prefilter!(data, bands, bb) -# end + M = I-2 + for j=1:J + dat = view(data, :, j) + prefilter!(dat) + end +end -function prefilter!(data::AbstractArray{T,2}) where T +function prefilter!(data::AbstractArray{T,2}, ::Val{:Threads}) where T I,J = size(data) M = J-2 - for i=1:I + Threads.@threads for i=1:I dat = view(data, i, :) prefilter!(dat) end M = I-2 - for j=1:J + Threads.@threads for j=1:J dat = view(data, :, j) prefilter!(dat) end end -function prefilter!(data::Array{T,3}) where T +function prefilter!(data::AbstractArray{T,3}) where T I,J,K = size(data) @@ -209,7 +198,7 @@ function prefilter!(data::Array{T,3}) where T end -function prefilter!(data::Array{T,4}) where T +function prefilter!(data::AbstractArray{T,4}) where T I,J,K,L = size(data) M = L-2 @@ -250,3 +239,80 @@ function prefilter!(data::Array{T,4}) where T end end end + +#### + +function prefilter!(data::AbstractArray{T,1}, ::Val{:CPU}) where T + + prefilter!(data) + +end + +using KernelAbstractions: @kernel + + +function prefilter!(data::AbstractArray{T,1}, ::Val{:KA}) where T + + @kernel function ker(m) + + prefilter!(m) + + end + + backend = get_backend(data) + + fun = ker(backend) + + fun(data, ndrange=(1,)) + +end + +using KernelAbstractions + + +using KernelAbstractions: CPU + + +function prefilter!(data::AbstractArray{T,2}, ::Val{:KA}) where T + + + @kernel function ker_1(m) + + i = @index(Global, Linear) + + dat = view(m, i, :) + prefilter!(dat) + + end + + @kernel function ker_2(m) + + j = @index(Global, Linear) + + dat = view(m, :, j) + prefilter!(dat) + + end + + + backend = get_backend(data) + + if backend==CPU() + # Not clear how to set number of threads here + fun_1 = ker_1(backend, 8) + fun_2 = ker_2(backend, 8) + else + fun_1 = ker_1(backend) + fun_2 = ker_2(backend) + end + + I,J = size(data) + + fun_1(data, ndrange=(I,)) + fun_2(data, ndrange=(J,)) + +end + +using oneAPI: oneArray + +prefilter!(data::oneArray) = prefilter!(data, Val(:KA)) \ No newline at end of file diff --git a/src/splines/splines.jl b/src/splines/splines.jl index deb4fec1..45620f29 100644 --- a/src/splines/splines.jl +++ b/src/splines/splines.jl @@ -95,7 +95,7 @@ end fill!(spl.θ, zero(eltype(spl.θ))) ind = tuple( (2:(e[3]+1) for e in spl.grid )...) spl.θ[ind...] .= rhs - splines.prefilter!(spl.θ) + prefilter!(spl.θ, Val(:KA)) elseif k==1 spl.θ .= rhs end From 4a6d0ba1788e35e0f22192fbf2511d6e96e6bb18 Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Sun, 2 Jun 2024 01:09:18 +0200 Subject: [PATCH 11/17] WIP --- misc/dev_float32.jl | 30 ++++++++++++++++++++++-------- src/adapt.jl | 8 ++++---- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/misc/dev_float32.jl b/misc/dev_float32.jl index 719405f2..28ad67dc 100644 --- a/misc/dev_float32.jl +++ b/misc/dev_float32.jl @@ -6,26 +6,40 @@ model_32 = include("$(root_dir)/misc/rbc_float32.jl") model32 = Dolo.convert_precision(Float32, model_32) -dm32 = Dolo.discretize(model32, Dict(:endo=>[1000]) ) +dm32 = Dolo.discretize(model32, Dict(:endo=>[100000]) ) typeof(dm32) using Adapt -import oneAPI: oneArray +# import oneAPI: oneArray +import CUDA: CuArray +# import Cu import Adapt: adapt_structure -# import CUDA: CuArray import Dolo +gpuArray = CuArray +interp_mode = :linear -wk0 = Dolo.time_iteration_workspace(dm32); +wk0 = Dolo.time_iteration_workspace(dm32; interp_mode=interp_mode); +wk1 = Dolo.time_iteration_workspace(dm32; interp_mode=interp_mode); +wk_gpu = Dolo.time_iteration_workspace(dm32, dest=gpuArray; interp_mode=interp_mode); -# wk = Dolo.time_iteration_workspace(dm32, dest=CuArray) -@time wk = Dolo.time_iteration_workspace(dm32, dest=oneArray; interp_mode=:cubic); +@time sol1 = time_iteration(dm32, wk0; engine=:nothing, tol_η=1e-5, verbose=true, improve_wait=0, improve=false); +@time sol3 = time_iteration(dm32, wk1; engine=:cpu, tol_η=1e-5, verbose=true, improve_wait=0, improve=false); +@time sol2 = time_iteration(dm32, wk_gpu; engine=:gpu, tol_η=1e-5, verbose=true, improve_wait=0, improve=false); -time_iteration(dm32, wk0; engine=:gpu, tol_η=1e-5, improve_wait=3, improve=true) -time_iteration(dm32, wk; engine=:gpu, tol_η=1e-5, improve_wait=3, improve=true) + + +wk0 = Dolo.time_iteration_workspace(dm32; interp_mode=:cubic); +wk1 = Dolo.time_iteration_workspace(dm32; interp_mode=:cubic); +wk_gpu = Dolo.time_iteration_workspace(dm32, dest=gpuArray; interp_mode=:cubic); + +@time sol1 = time_iteration(dm32, wk0; engine=:nothing, tol_η=1e-5, verbose=true, improve_wait=10, improve_K=100,improve=true); +@time sol3 = time_iteration(dm32, wk1; engine=:cpu, tol_η=1e-5, verbose=true, improve_wait=10, improve_K=100, improve=true); +# that one stops early +@time sol2 = time_iteration(dm32, wk_gpu; engine=:gpu, tol_η=1e-5, verbose=true, improve_wait=10, improve_K=100, improve=true); diff --git a/src/adapt.jl b/src/adapt.jl index 90702d08..c778e955 100644 --- a/src/adapt.jl +++ b/src/adapt.jl @@ -35,10 +35,10 @@ function adapt_structure(to, L::LL{G,D,F}) where G where D where F LL(L.grid, adapt(to, L.D), adapt(to, L.φ)) end - +maxabs(u::Number, v::Number) = abs(max(u,v)) # should it be merged with the general definition? -# import CUDA: CuArray -# distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:CuArray = Base.mapreduce(u->maximum(u), max, x.data-y.data) +import CUDA: CuArray +distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:CuArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data-y.data) import oneAPI: oneArray -distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:oneArray = Base.mapreduce(u->maximum(u), max, x.data-y.data) \ No newline at end of file +distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:oneArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data-y.data) \ No newline at end of file From a3cc8e28c5218ee3a1bcf15e9d733864cafa134c Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Wed, 5 Jun 2024 00:09:17 +0200 Subject: [PATCH 12/17] Helper function for 32bits conversion. --- examples/ymodels/rbc_mc.jl | 13 +++--- misc/dev_conversion.jl | 92 +++++++++++++++++++++++++++++++++++++ src/algos/simul.jl | 1 - src/algos/time_iteration.jl | 7 +-- src/conversion.jl | 2 +- src/grids.jl | 8 ++++ 6 files changed, 111 insertions(+), 12 deletions(-) create mode 100644 misc/dev_conversion.jl diff --git a/examples/ymodels/rbc_mc.jl b/examples/ymodels/rbc_mc.jl index c5ec190e..54b54637 100644 --- a/examples/ymodels/rbc_mc.jl +++ b/examples/ymodels/rbc_mc.jl @@ -8,8 +8,8 @@ model = let # calibrate some parameters β = 0.99 - σ = 5 - η = 1 + σ = 5.0 + η = 1.0 δ = 0.025 α = 0.33 ρ = 0.8 @@ -55,7 +55,8 @@ model = let end -function Dolo.transition(model::typeof(model), s::NamedTuple, x::NamedTuple, M::NamedTuple) + +function Dolo.transition(model::hypeof(model), s::NamedTuple, x::NamedTuple, M::NamedTuple) (;δ, ρ) = model.calibration @@ -68,7 +69,7 @@ end -function intermediate(model::typeof(model),s::NamedTuple, x::NamedTuple) +function intermediate(model::hypeof(model), s::NamedTuple, x::NamedTuple) p = model.calibration @@ -81,7 +82,7 @@ function intermediate(model::typeof(model),s::NamedTuple, x::NamedTuple) end -function arbitrage(model::typeof(model), s::NamedTuple, x::NamedTuple, S::NamedTuple, X::NamedTuple) +function arbitrage(model::hypeof(model), s::NamedTuple, x::NamedTuple, S::NamedTuple, X::NamedTuple) p = model.calibration @@ -95,6 +96,4 @@ function arbitrage(model::typeof(model), s::NamedTuple, x::NamedTuple, S::NamedT end - - model \ No newline at end of file diff --git a/misc/dev_conversion.jl b/misc/dev_conversion.jl new file mode 100644 index 00000000..7502e384 --- /dev/null +++ b/misc/dev_conversion.jl @@ -0,0 +1,92 @@ +using Dolo +using Dolo: transition +function hypeof(model) + typ = typeof(model) + otyp = eltype(model) + ntyp = Float32 + s = replace(string(typ),string(otyp)=>"Float32") + gtyp = ( Meta.parse(s) ) + return Union{eval(gtyp), typ} +end + +# hypeof(model) +root_dir = pkgdir(Dolo) + +# macro mdef(funtree) +# println(funtree) +# println(dump(funtree)) +# println("\nDONE\n") +# return funtree +# end + +model = include("$(root_dir)/examples/ymodels/rbc_mc.jl") + + +# macro mdef(funtree) +# return funtree +# end + +# @mdef function fun(a) +# a = (;x=1,y=2) +# (;x, y) = a +# return (x*y) +# end + + + +model32 = Dolo.convert_precision(Float32, model) + + + +s_ = Dolo.calibrated(model32, :states) +x = Dolo.calibrated(model32, :controls) +m_ = Dolo.calibrated(model32, :exogenous) + +s = [m_;s_] + +dm32 = Dolo.discretize(model32) +Dolo.time_iteration(dm32, verbose=true) + +Dolo.time_iteration(model32) + +Dolo.transition(model32, s,x,m_) + + + +function Dolo.transition(model::typeof(model), s::NamedTuple, x::NamedTuple, M::NamedTuple) + + (;δ, ρ) = model.calibration + + # Z = e.Z + K = s.k * (1-δ) + x.i + + (;k=K,) ## This is only the endogenous state + +end + + +model_32 = Dolo.convert_precision(Float32, model) +# inherit all methods +transition(m::typeof(model_32), args...) = transition(model, args...) +arbitrage(m::typeof(model_32), args...) = arbitrage(model, args...) + +Dolo.time_iteration(model_32) + + +dm32 = Dolo.discretize(model_32, Dict(:endo=>[1000]) ) + +typeof(dm32) + +using Adapt +import oneAPI: oneArray + +gpuArray = oneArray +interp_mode = :linear + +wk0 = Dolo.time_iteration_workspace(dm32; interp_mode=interp_mode); +wk1 = Dolo.time_iteration_workspace(dm32; interp_mode=interp_mode); +wk_gpu = Dolo.time_iteration_workspace(dm32, dest=gpuArray; interp_mode=interp_mode); + +@time sol1 = time_iteration(dm32, wk0; engine=:nothing, tol_η=1e-5, verbose=true, improve_wait=0, improve=false); +@time sol3 = time_iteration(dm32, wk1; engine=:cpu, tol_η=1e-5, verbose=true, improve_wait=0, improve=false); +@time sol2 = time_iteration(dm32, wk_gpu; engine=:gpu, tol_η=1e-5, verbose=true, improve_wait=0, improve=false); diff --git a/src/algos/simul.jl b/src/algos/simul.jl index d31bc495..8442be56 100644 --- a/src/algos/simul.jl +++ b/src/algos/simul.jl @@ -11,7 +11,6 @@ function τ(dmodel::Dolo.DYModel{M}, ss::T, a::SVector) where M<:Union{Dolo.YMod (i,_) = ss.loc s_ = ss.val - # TODO: replace following block by one nonallocating function Q = dmodel.dproc.Q diff --git a/src/algos/time_iteration.jl b/src/algos/time_iteration.jl index efce75fe..6e226284 100644 --- a/src/algos/time_iteration.jl +++ b/src/algos/time_iteration.jl @@ -22,18 +22,19 @@ function F(dmodel::M, s::QP, x::SVector{d,T}, φ::Union{Policy, GArray, DFun}) where M where d where T r = zero(SVector{d,T}) + for (w,S) in τ(dmodel, s, x) r += w*arbitrage(dmodel,s,x,S,φ(S)) end + # TODO: why does the following allocate ? # strange: if reloaded it doesn't allocate anymore # r += sum( # w*arbitrage(model,s,x,S,φ(S)) # for (w,S) in τ(model, s, x) # ) - r - # r::SVector{d,T} - # r = complementarities(dmodel.model, s,x,r) + r::SVector{d,T} + r = complementarities(dmodel.model, s,x,r) r end diff --git a/src/conversion.jl b/src/conversion.jl index 10fea021..73e7429d 100644 --- a/src/conversion.jl +++ b/src/conversion.jl @@ -18,7 +18,7 @@ function convert_precision(T, model::Dolo.YModel) Dolo.YModel( - names, + Dolo.name(model), states, controls, exogenous, diff --git a/src/grids.jl b/src/grids.jl index 0200e742..b4829e66 100644 --- a/src/grids.jl +++ b/src/grids.jl @@ -108,6 +108,14 @@ getindex(g::PGrid{G1, G2, d}, ::Colon, i::Int64) where G1 where G2 where d = g.g return i + p*(j-1) end +@inline function getindex(g::PGrid{G1, G2, d}, z::Complex{Int64}) where G1<:SGrid{d1} where G2<:CGrid{d2} where d where d1 where d2 + # Tf = eltype(g) + # SVector{d,Tf}(g.grids[1][i]..., g.grids[2][j]...) + i,j = from_linear(g, z.re) + Dolo.QP((i,j), SVector(g.grids[1][i]..., g.grids[2][j]...)) +end + + @inline to__linear_index(g::PGrid, ind::Tuple{Int64, Int64}) = ind[1] + length(g.grids[1])*(ind[2]-1) From 51525f698264a7539dc6229da715f07f58b26c39 Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Wed, 5 Jun 2024 15:01:11 +0200 Subject: [PATCH 13/17] WIP --- Project.toml | 13 ++++++++----- examples/ymodels/rbc_mc.jl | 2 +- src/adapt.jl | 6 ------ src/algos/time_iteration.jl | 1 - src/conversion.jl | 10 ++++++++++ src/splines/cubic_prefilter.jl | 3 --- 6 files changed, 19 insertions(+), 16 deletions(-) diff --git a/Project.toml b/Project.toml index 462d832c..979dfcb2 100644 --- a/Project.toml +++ b/Project.toml @@ -3,19 +3,16 @@ uuid = "9d24351c-2990-5e1b-a277-04c4b809c898" version = "0.5.0-alpha" [deps] -AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9" BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Continuables = "79afa230-ca09-11e8-120b-5decf7bf5e25" Crayons = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" Dolang = "e5c7262c-e9d2-5620-ad8e-1af14eb8a8e3" -Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" Formatting = "59287772-0a20-5a39-b81b-1366585eb4c0" @@ -44,5 +41,11 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" StringDistances = "88034a9c-02f8-509d-84a9-84ec65e18404" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" -oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" + +[extensions] +DolooneAPIExt = "oneAPI" +DoloCUDAExt = "CUDA" + +[weakdeps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +oneAPI = "8f75cd03-7ff8-4ecb-9b8f-daf728133b1b" \ No newline at end of file diff --git a/examples/ymodels/rbc_mc.jl b/examples/ymodels/rbc_mc.jl index 54b54637..f16a2a52 100644 --- a/examples/ymodels/rbc_mc.jl +++ b/examples/ymodels/rbc_mc.jl @@ -1,6 +1,6 @@ using StaticArrays -import Dolo: transition, arbitrage, intermediate +import Dolo: transition, arbitrage, intermediate, hypeof model = let diff --git a/src/adapt.jl b/src/adapt.jl index c778e955..39873b6e 100644 --- a/src/adapt.jl +++ b/src/adapt.jl @@ -36,9 +36,3 @@ function adapt_structure(to, L::LL{G,D,F}) where G where D where F end maxabs(u::Number, v::Number) = abs(max(u,v)) -# should it be merged with the general definition? -import CUDA: CuArray -distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:CuArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data-y.data) - -import oneAPI: oneArray -distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:oneArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data-y.data) \ No newline at end of file diff --git a/src/algos/time_iteration.jl b/src/algos/time_iteration.jl index 6e226284..c9b361a9 100644 --- a/src/algos/time_iteration.jl +++ b/src/algos/time_iteration.jl @@ -176,7 +176,6 @@ function time_iteration(model::YModel; kwargs...) time_iteration(dmodel, wksp; kwargs2...) end -using oneAPI: oneArray function time_iteration(model::DYModel, workspace=time_iteration_workspace(model); T=500, diff --git a/src/conversion.jl b/src/conversion.jl index 73e7429d..b4de4d41 100644 --- a/src/conversion.jl +++ b/src/conversion.jl @@ -37,3 +37,13 @@ end function convert_precision(Tnew, ps::Dolo.ProductSpace) Dolo.ProductSpace((convert_precision(Tnew, e) for e in ps.spaces)...) end + + +function hypeof(model) + typ = typeof(model) + otyp = eltype(model) + ntyp = Float32 + s = replace(string(typ),string(otyp)=>"Float32") + gtyp = ( Meta.parse(s) ) + return Union{eval(gtyp), typ} +end \ No newline at end of file diff --git a/src/splines/cubic_prefilter.jl b/src/splines/cubic_prefilter.jl index e6ff2f74..f5988140 100644 --- a/src/splines/cubic_prefilter.jl +++ b/src/splines/cubic_prefilter.jl @@ -313,6 +313,3 @@ function prefilter!(data::AbstractArray{T,2}, ::Val{:KA}) where T end -using oneAPI: oneArray - -prefilter!(data::oneArray) = prefilter!(data, Val(:KA)) \ No newline at end of file From b8f8f2cef19c0c94761790521d17303dc03062a2 Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Wed, 5 Jun 2024 15:02:29 +0200 Subject: [PATCH 14/17] Updated CI matrix. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8bd006b9..3d7921af 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - julia-version: ['1.7','1.8'] + julia-version: ['1.9','1.10'] julia-arch: [x64] os: [ubuntu-latest, windows-latest, macOS-latest] From 7d60c36ef1464cdc81ba4d249334c2eeca292422 Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Wed, 5 Jun 2024 18:30:50 +0200 Subject: [PATCH 15/17] WIP --- src/splines/cubic_prefilter.jl | 89 ++++++++++++++++++++++++++++--- src/splines/interp.jl | 38 +++++++++++--- src/splines/macros.jl | 95 ++++++++++++++++++++++++---------- test/test_interpolation.jl | 2 +- 4 files changed, 181 insertions(+), 43 deletions(-) diff --git a/src/splines/cubic_prefilter.jl b/src/splines/cubic_prefilter.jl index f5988140..7f7c9b7a 100644 --- a/src/splines/cubic_prefilter.jl +++ b/src/splines/cubic_prefilter.jl @@ -75,17 +75,29 @@ end # TODO: there are certainly some cases where we don't want MVectors there -# function prefilter!(data::AbstractVector{T}) where T +function prefilter!(data::AbstractVector{T}) where T -# N = length(data) + N = length(data) -# bands = zero(MMatrix{N, 3, Float64, N*3}) -# bb = zero(MVector{N, T}) + bands = zero(MMatrix{N, 3, Float64, N*3}) + bb = zero(MVector{N, T}) -# fill_bands!(N-2, bands, bb, data) -# solve_coefficients!(bands, bb, data) + fill_bands!(N-2, bands, bb, data) + solve_coefficients!(bands, bb, data) -# end +end + +function prefilter_1!(data::AbstractVector{T}) where T + + N = length(data) + + bands = zero(MMatrix{N, 3, Float64, N*3}) + bb = zero(MVector{N, T}) + + fill_bands!(N-2, bands, bb, data) + solve_coefficients!(bands, bb, data) + +end const zq_0 = sqrt(3.0)-2.0 const λ_0 = 6.0 @@ -94,10 +106,11 @@ getprecision(g::AbstractArray{T}) where T<:Number = T getprecision(g::AbstractArray{T}) where T<:SVector = eltype(g).types[1].types[1] -function prefilter!(c::AbstractVector{T}) where T +function prefilter_2!(c::AbstractVector{T}) where T prec = getprecision(c) + λ = convert(prec, λ_0) zq = convert(prec, zq_0) @@ -313,3 +326,63 @@ function prefilter!(data::AbstractArray{T,2}, ::Val{:KA}) where T end + +function prefilter!(data::AbstractArray{T,3}, ::Val{:KA}) where T + + + @kernel function ker_1(m) + + c = @index(Global, Cartesian) + i = c.I[1] + j = c.I[2] + + dat = view(m, i, j, :) + prefilter!(dat) + + end + + @kernel function ker_2(m) + + c = @index(Global, Cartesian) + j = c.I[1] + k = c.I[2] + + dat = view(m, :, j, k) + prefilter!(dat) + + end + + @kernel function ker_3(m) + + + c = @index(Global, Cartesian) + i = c.I[1] + k = c.I[2] + + dat = view(m, i, :, k) + prefilter!(dat) + + end + + + backend = get_backend(data) + + if backend==CPU() + # Not clear how to set number of threads here + fun_1 = ker_1(backend, 8) + fun_2 = ker_2(backend, 8) + fun_3 = ker_3(backend, 8) + else + fun_1 = ker_1(backend) + fun_2 = ker_2(backend) + fun_3 = ker_3(backend) + end + + I,J,K = size(data) + + fun_1(data, ndrange=(I,J)) + fun_2(data, ndrange=(J,K)) + fun_3(data, ndrange=(I,K)) + +end + diff --git a/src/splines/interp.jl b/src/splines/interp.jl index 725b38ce..c26356e0 100644 --- a/src/splines/interp.jl +++ b/src/splines/interp.jl @@ -16,20 +16,42 @@ end @inline function reduce_tensors(λ::SVector{2,U}, M::SArray{T,V,2,W}) where T where U where V where W v1 = SVector(1-λ[1],λ[1]) v2 = SVector(1-λ[2],λ[2]) - return M[1,1]*v1[1]*v2[1] + M[1,2]*v1[1]*v2[2] + M[2,1]*v1[2]*v2[1] + M[2,2]*v1[2]*v2[2] + return M[1,1]*v1[1]*v2[1] + + M[1,2]*v1[1]*v2[2] + + M[2,1]*v1[2]*v2[1] + + M[2,2]*v1[2]*v2[2] + + # vv = tuple( (SVector(1-e, e) for e in λ)...) + # xx = SVector( (prod(e) for e in Iterators.product(vv...))...) + # return sum( xx .* v[:]) +end + +@inline function reduce_tensors(λ::SVector{3,U}, M::SArray{T,V,3,W}) where T where U where V where W + v1 = SVector(1-λ[1],λ[1]) + v2 = SVector(1-λ[2],λ[2]) + v3 = SVector(1-λ[3],λ[3]) + res = M[1,1,1]*v1[1]*v2[1]*v3[1] + + M[2,1,1]*v1[2]*v2[1]*v3[1] + + M[1,2,1]*v1[1]*v2[2]*v3[1] + + M[2,2,1]*v1[2]*v2[2]*v3[1] + + M[1,1,2]*v1[1]*v2[1]*v3[2] + + M[2,1,2]*v1[2]*v2[1]*v3[2] + + M[1,2,2]*v1[1]*v2[2]*v3[2] + + M[2,2,2]*v1[2]*v2[2]*v3[2] + return res # vv = tuple( (SVector(1-e, e) for e in λ)...) # xx = SVector( (prod(e) for e in Iterators.product(vv...))...) # return sum( xx .* v[:]) end matextract(v::AbstractArray{T,3}, i,j,k) where T = SArray{Tuple{2, 2, 2}, T, 3, 8}( - v[i, j ,k], - v[i+1,j ,k], - v[i ,j+1,k], - v[i+1,j+1,k], - v[i ,j ,k+1], - v[i+1,j ,k+1], - v[i ,j+1,k+1], + v[ i, j,k ], + v[i+1, j,k ], + v[ i,j+1,k ], + v[i+1,j+1,k ], + v[ i, j,k+1], + v[i+1, j,k+1], + v[ i,j+1,k+1], v[i+1,j+1,k+1] ) diff --git a/src/splines/macros.jl b/src/splines/macros.jl index 5e354d23..28065ccf 100644 --- a/src/splines/macros.jl +++ b/src/splines/macros.jl @@ -156,22 +156,21 @@ function create_parameters(d; Tf=Float64) return lines end + function create_local_parameters(d; vectorize=true, Tf=Float64) lines = [] if vectorize for i=1:d - bl = quote $(U("x",i)) = S[n][$i] $(U("u",i)) = ($(U("x",i)) - $(U("start",i)))*$(U("dinv",i)) - # $(U("i",i)) = (floor(Int,$(U("u",i)) )) - $(U("i",i)) = unsafe_trunc(Int, $(U("u",i))) + $(U("i",i)) = (floor(Int,$(U("u",i)) )) $(U("i",i)) = max( min($(U("i",i)),$(U("M",i))-2), 0 ) $(U("t",i)) = $(U("u",i))-$(U("i",i)) $(U("tp",i,1)) = $(U("t",i))*$(U("t",i))*$(U("t",i)) $(U("tp",i,2)) = $(U("t",i))*$(U("t",i)) $(U("tp",i,3)) = $(U("t",i)) - $(U("tp",i,4)) = convert($Tf,1.0); #TODO + $(U("tp",i,4)) = 1.0; end for ll in bl.args push!(lines, ll) @@ -179,33 +178,17 @@ function create_local_parameters(d; vectorize=true, Tf=Float64) end else lines = [] - e = quote - x_ = SVector(S...) - a_ = SVector( a... ) - b_ = SVector( b... ) - n_ = SVector( orders... ) - δ_ = (b_.-a_)./(n_.-1) - i_ = div.( (x_.-a_), δ_) - i_ = max.(min.(i_, n_.-2), 0) - λ_ = (x_.-(a_ .+ δ_.*i_))./δ_ - ii_ = unsafe_trunc.(Int,i_) - - end - for x in e.args - push!(lines, x) - end for i=1:d bl = quote - - - # compat - $(U("i",i)) = ii_[$i] - $(U("t",i)) = λ_[$i] - + $(U("x",i)) = S[$i] + $(U("u",i)) = ($(U("x",i)) - $(U("start",i)))*$(U("dinv",i)) + $(U("i",i)) = (floor(Int,$(U("u",i)) )) + $(U("i",i)) = max( min($(U("i",i)),$(U("M",i))-2), 0 ) + $(U("t",i)) = $(U("u",i))-$(U("i",i)) $(U("tp",i,1)) = $(U("t",i))*$(U("t",i))*$(U("t",i)) $(U("tp",i,2)) = $(U("t",i))*$(U("t",i)) $(U("tp",i,3)) = $(U("t",i)) - $(U("tp",i,4)) = convert($Tf,1.0); + $(U("tp",i,4)) = 1.0; end for ll in bl.args push!(lines, ll) @@ -215,6 +198,66 @@ function create_local_parameters(d; vectorize=true, Tf=Float64) return lines end +# function create_local_parameters(d; vectorize=true, Tf=Float64) +# lines = [] +# if vectorize +# for i=1:d + +# bl = quote +# $(U("x",i)) = S[n][$i] +# $(U("u",i)) = ($(U("x",i)) - $(U("start",i)))*$(U("dinv",i)) +# # $(U("i",i)) = (floor(Int,$(U("u",i)) )) +# $(U("i",i)) = unsafe_trunc(Int, $(U("u",i))) +# $(U("i",i)) = max( min($(U("i",i)),$(U("M",i))-2), 0 ) +# $(U("t",i)) = $(U("u",i))-$(U("i",i)) +# $(U("tp",i,1)) = $(U("t",i))*$(U("t",i))*$(U("t",i)) +# $(U("tp",i,2)) = $(U("t",i))*$(U("t",i)) +# $(U("tp",i,3)) = $(U("t",i)) +# $(U("tp",i,4)) = convert($Tf,1.0); #TODO +# end +# for ll in bl.args +# push!(lines, ll) +# end +# end +# else +# lines = [] +# e = quote +# x_ = SVector(S...) +# a_ = SVector( a... ) +# b_ = SVector( b... ) +# n_ = SVector( orders... ) +# δ_ = (b_.-a_)./(n_.-1) +# # i_ = div.( (x_.-a_), δ_) +# i_ = SVector( ()...) +# i_ = max.(min.(i_, n_.-2), 0) +# λ_ = (x_.-(a_ .+ δ_.*i_))./δ_ +# ii_ = unsafe_trunc.(Int,i_) + +# end +# for x in e.args +# push!(lines, x) +# end +# for i=1:d +# bl = quote + + +# # compat +# $(U("i",i)) = ii_[$i] +# $(U("t",i)) = λ_[$i] + +# $(U("tp",i,1)) = $(U("t",i))*$(U("t",i))*$(U("t",i)) +# $(U("tp",i,2)) = $(U("t",i))*$(U("t",i)) +# $(U("tp",i,3)) = $(U("t",i)) +# $(U("tp",i,4)) = convert($Tf,1.0); +# end +# for ll in bl.args +# push!(lines, ll) +# end +# end +# end +# return lines +# end + function create_function(d,extrap="natural"; vectorize=true, Tf=Float32) if vectorize expr = quote diff --git a/test/test_interpolation.jl b/test/test_interpolation.jl index 4ab3bd59..bf94c3dd 100644 --- a/test/test_interpolation.jl +++ b/test/test_interpolation.jl @@ -12,7 +12,7 @@ using StaticArrays NN = 20 vars = tuple( (Symbol("d$i") for i in 1:d)... ) - pairs = (vars[i]=>[0.0, 1.0*i] for i in 1:d) + pairs = (vars[i]=>(0.0, 1.0*i) for i in 1:d) dvars = Dict( pairs ) cs = Dolo.CSpace(; pairs... From 8b4c56dd470f41801bee80ed0498f8552d0a319b Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Wed, 5 Jun 2024 18:39:57 +0200 Subject: [PATCH 16/17] Added extension. --- ext/DoloCUDAExt.jl | 12 ++++++++++++ ext/DolooneAPIExt.jl | 16 ++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 ext/DoloCUDAExt.jl create mode 100644 ext/DolooneAPIExt.jl diff --git a/ext/DoloCUDAExt.jl b/ext/DoloCUDAExt.jl new file mode 100644 index 00000000..3624089c --- /dev/null +++ b/ext/DoloCUDAExt.jl @@ -0,0 +1,12 @@ +module DoloCUDAExt + + using CUDA + using Dolo +# should it be merged with the general definition? + + import CUDA: CuArray + + Dolo.distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:CuArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data-y.data) + + +end diff --git a/ext/DolooneAPIExt.jl b/ext/DolooneAPIExt.jl new file mode 100644 index 00000000..95c2cd36 --- /dev/null +++ b/ext/DolooneAPIExt.jl @@ -0,0 +1,16 @@ +module DolooneAPIExt + + println("Loading oneAPI") + using oneAPI + import oneAPI: oneArray + + using Dolo: distance + + Dolo.distance(x::GVector{G, A}, y::GVector{G,A}) where G where A<:oneArray = Base.mapreduce(u->maximum(abs.(u)), max, x.data-y.data) + + import Dolo.splines: prefilter! + + prefilter!(data::oneArray) = prefilter!(data, Val(:KA)) + + +end From 663d857991041662bab06ed55b3eb4cecd784538 Mon Sep 17 00:00:00 2001 From: Pablo Winant Date: Wed, 5 Jun 2024 19:16:28 +0200 Subject: [PATCH 17/17] All tests pass. --- src/dolo_model.jl | 4 +- src/garray.jl | 2 +- src/space.jl | 3 +- src/splines/macros.jl | 96 ++++++++++++------------------------------- 4 files changed, 31 insertions(+), 74 deletions(-) diff --git a/src/dolo_model.jl b/src/dolo_model.jl index bd76af4a..6394e776 100644 --- a/src/dolo_model.jl +++ b/src/dolo_model.jl @@ -80,7 +80,9 @@ function discretize(model::YModel{<:VAR1}, d=Dict()) n_s = length(Dolo.variables(model.states)) - d exo_grid = SGrid(dvar.Q) - endo_space = CartesianSpace{n_s, Dolo.variables(model.states)[d+1:end]}( + # TODO: simplify that call + Tf = eltype(model) + endo_space = CartesianSpace{n_s, Dolo.variables(model.states)[d+1:end],Tf}( model.states.min[d+1:end], model.states.max[d+1:end] ) diff --git a/src/garray.jl b/src/garray.jl index 66074419..cca063ea 100644 --- a/src/garray.jl +++ b/src/garray.jl @@ -63,7 +63,7 @@ getindex(g::GArray{G,T}, i::Int64) where G where T = g.data[i] setindex!(g::GArray, x, i) = (g.data[i] = x) function setindex!( - g::GArray{Dolo.CGrid{2},Vector{T}}, + g::GArray{<:Dolo.CGrid{2},Vector{T}}, v::T, i::Int64, j::Int64) where T diff --git a/src/space.jl b/src/space.jl index 33a0332d..34666da3 100644 --- a/src/space.jl +++ b/src/space.jl @@ -8,8 +8,7 @@ end const CSpace = CartesianSpace -CartesianSpace(a::Tuple{Tf}, b::Tuple{Tf}) where Tf = CartesianSpace{length(a), (:x,), Tf}(a,b) -CartesianSpace(a::Tuple{Tf, Tf}, b::Tuple{Tf, Tf}) where Tf = CartesianSpace{length(a), (:x_1, :x_2), Tf}(a,b) +# CartesianSpace(a::NTuple{d,Tf}, b::NTuple{d, Tf}) where d where Tf = CartesianSpace{length(a), (:x,), Tf}(a,b) function CartesianSpace(kwargs::Pair{Symbol, Tuple{Tf, Tf}}...) where Tf diff --git a/src/splines/macros.jl b/src/splines/macros.jl index 28065ccf..c989ff82 100644 --- a/src/splines/macros.jl +++ b/src/splines/macros.jl @@ -109,7 +109,7 @@ function create_Phi(d, extrap, diff; Tf=Float64) return lines end -function tensor_prod(symbs, inds, add_index=false) +function tensor_prod(symbs, inds, add_index=false; Tf=Float64) if length(symbs)==0 subscripts = [:($(U("i",i))+$(inds[i])) for i=1:length(inds)] if add_index @@ -157,20 +157,23 @@ function create_parameters(d; Tf=Float64) end + function create_local_parameters(d; vectorize=true, Tf=Float64) lines = [] if vectorize for i=1:d + bl = quote $(U("x",i)) = S[n][$i] $(U("u",i)) = ($(U("x",i)) - $(U("start",i)))*$(U("dinv",i)) - $(U("i",i)) = (floor(Int,$(U("u",i)) )) + # $(U("i",i)) = (floor(Int,$(U("u",i)) )) + $(U("i",i)) = unsafe_trunc(Int, $(U("u",i))) $(U("i",i)) = max( min($(U("i",i)),$(U("M",i))-2), 0 ) $(U("t",i)) = $(U("u",i))-$(U("i",i)) $(U("tp",i,1)) = $(U("t",i))*$(U("t",i))*$(U("t",i)) $(U("tp",i,2)) = $(U("t",i))*$(U("t",i)) $(U("tp",i,3)) = $(U("t",i)) - $(U("tp",i,4)) = 1.0; + $(U("tp",i,4)) = convert($Tf,1.0); #TODO end for ll in bl.args push!(lines, ll) @@ -178,17 +181,30 @@ function create_local_parameters(d; vectorize=true, Tf=Float64) end else lines = [] + e = quote + x_ = SVector(S...) + a_ = SVector( a... ) + b_ = SVector( b... ) + n_ = SVector( orders... ) + δ_ = (b_.-a_)./(n_.-1) + i_ = div.( (x_.-a_), δ_) + i_ = max.(min.(i_, n_.-2), 0) + λ_ = (x_.-(a_ .+ δ_.*i_))./δ_ + ii_ = unsafe_trunc.(Int,i_) + end + for x in e.args + push!(lines, x) + end for i=1:d bl = quote - $(U("x",i)) = S[$i] - $(U("u",i)) = ($(U("x",i)) - $(U("start",i)))*$(U("dinv",i)) - $(U("i",i)) = (floor(Int,$(U("u",i)) )) - $(U("i",i)) = max( min($(U("i",i)),$(U("M",i))-2), 0 ) - $(U("t",i)) = $(U("u",i))-$(U("i",i)) + # compat + $(U("i",i)) = ii_[$i] + $(U("t",i)) = λ_[$i] + $(U("tp",i,1)) = $(U("t",i))*$(U("t",i))*$(U("t",i)) $(U("tp",i,2)) = $(U("t",i))*$(U("t",i)) $(U("tp",i,3)) = $(U("t",i)) - $(U("tp",i,4)) = 1.0; + $(U("tp",i,4)) = convert($Tf,1.0); end for ll in bl.args push!(lines, ll) @@ -198,67 +214,7 @@ function create_local_parameters(d; vectorize=true, Tf=Float64) return lines end -# function create_local_parameters(d; vectorize=true, Tf=Float64) -# lines = [] -# if vectorize -# for i=1:d - -# bl = quote -# $(U("x",i)) = S[n][$i] -# $(U("u",i)) = ($(U("x",i)) - $(U("start",i)))*$(U("dinv",i)) -# # $(U("i",i)) = (floor(Int,$(U("u",i)) )) -# $(U("i",i)) = unsafe_trunc(Int, $(U("u",i))) -# $(U("i",i)) = max( min($(U("i",i)),$(U("M",i))-2), 0 ) -# $(U("t",i)) = $(U("u",i))-$(U("i",i)) -# $(U("tp",i,1)) = $(U("t",i))*$(U("t",i))*$(U("t",i)) -# $(U("tp",i,2)) = $(U("t",i))*$(U("t",i)) -# $(U("tp",i,3)) = $(U("t",i)) -# $(U("tp",i,4)) = convert($Tf,1.0); #TODO -# end -# for ll in bl.args -# push!(lines, ll) -# end -# end -# else -# lines = [] -# e = quote -# x_ = SVector(S...) -# a_ = SVector( a... ) -# b_ = SVector( b... ) -# n_ = SVector( orders... ) -# δ_ = (b_.-a_)./(n_.-1) -# # i_ = div.( (x_.-a_), δ_) -# i_ = SVector( ()...) -# i_ = max.(min.(i_, n_.-2), 0) -# λ_ = (x_.-(a_ .+ δ_.*i_))./δ_ -# ii_ = unsafe_trunc.(Int,i_) - -# end -# for x in e.args -# push!(lines, x) -# end -# for i=1:d -# bl = quote - - -# # compat -# $(U("i",i)) = ii_[$i] -# $(U("t",i)) = λ_[$i] - -# $(U("tp",i,1)) = $(U("t",i))*$(U("t",i))*$(U("t",i)) -# $(U("tp",i,2)) = $(U("t",i))*$(U("t",i)) -# $(U("tp",i,3)) = $(U("t",i)) -# $(U("tp",i,4)) = convert($Tf,1.0); -# end -# for ll in bl.args -# push!(lines, ll) -# end -# end -# end -# return lines -# end - -function create_function(d,extrap="natural"; vectorize=true, Tf=Float32) +function create_function(d,extrap="natural"; vectorize=true, Tf=Float64) if vectorize expr = quote function $(Symbol(string("eval_UC_spline!")))( a, b, orders, C::Array{T,d}, S::Vector{SVector{d,$Tf}}, V::Vector{T}) where d where T