Skip to content

Commit 3c082e6

Browse files
committed
Fix inbounds codegen for CPU
1 parent edd5371 commit 3c082e6

File tree

1 file changed

+17
-12
lines changed

1 file changed

+17
-12
lines changed

src/macros.jl

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,6 @@ function __kernel(expr, generate_cpu=true, force_inbounds=false)
1414
def = splitdef(expr)
1515
name = def[:name]
1616
args = def[:args]
17-
if force_inbounds
18-
body_qt = quote
19-
@inbounds $(def[:body])
20-
end
21-
def[:body] = body_qt
22-
end
23-
2417
find_return(expr) && error("Return statement not permitted in a kernel function $name")
2518

2619
constargs = Array{Bool}(undef, length(args))
@@ -45,13 +38,13 @@ function __kernel(expr, generate_cpu=true, force_inbounds=false)
4538
if generate_cpu
4639
def_cpu = deepcopy(def)
4740
def_cpu[:name] = cpu_name
48-
transform_cpu!(def_cpu, constargs)
41+
transform_cpu!(def_cpu, constargs, force_inbounds)
4942
cpu_function = combinedef(def_cpu)
5043
end
5144

5245
def_gpu = deepcopy(def)
5346
def_gpu[:name] = gpu_name = Symbol(:gpu_, name)
54-
transform_gpu!(def_gpu, constargs)
47+
transform_gpu!(def_gpu, constargs, force_inbounds)
5548
gpu_function = combinedef(def_gpu)
5649

5750
# create constructor functions
@@ -83,17 +76,23 @@ end
8376

8477
# The easy case, transform the function for GPU execution
8578
# - mark constant arguments by applying `constify`.
86-
function transform_gpu!(def, constargs)
79+
function transform_gpu!(def, constargs, force_inbounds)
8780
let_constargs = Expr[]
8881
for (i, arg) in enumerate(def[:args])
8982
if constargs[i]
9083
push!(let_constargs, :($arg = $constify($arg)))
9184
end
9285
end
9386
pushfirst!(def[:args], :__ctx__)
87+
body = def[:body]
88+
if force_inbounds
89+
body = quote
90+
@inbounds $(body)
91+
end
92+
end
9493
body = quote
9594
if $__validindex(__ctx__)
96-
$(def[:body])
95+
$(body)
9796
end
9897
return nothing
9998
end
@@ -110,7 +109,7 @@ end
110109
# - handle indicies
111110
# - hoist workgroup definitions
112111
# - hoist uniform variables
113-
function transform_cpu!(def, constargs)
112+
function transform_cpu!(def, constargs, force_inbounds)
114113
let_constargs = Expr[]
115114
for (i, arg) in enumerate(def[:args])
116115
if constargs[i]
@@ -121,7 +120,13 @@ function transform_cpu!(def, constargs)
121120
new_stmts = Expr[]
122121
body = MacroTools.flatten(def[:body])
123122
push!(new_stmts, Expr(:aliasscope))
123+
if force_inbounds
124+
push!(new_stmts, Expr(:inbounds, true))
125+
end
124126
append!(new_stmts, split(body.args))
127+
if force_inbounds
128+
push!(new_stmts, Expr(:inbounds, :pop))
129+
end
125130
push!(new_stmts, Expr(:popaliasscope))
126131
push!(new_stmts, :(return nothing))
127132
def[:body] = Expr(:let,

0 commit comments

Comments
 (0)