@@ -14,13 +14,6 @@ function __kernel(expr, generate_cpu=true, force_inbounds=false)
14
14
def = splitdef (expr)
15
15
name = def[:name ]
16
16
args = def[:args ]
17
- if force_inbounds
18
- body_qt = quote
19
- @inbounds $ (def[:body ])
20
- end
21
- def[:body ] = body_qt
22
- end
23
-
24
17
find_return (expr) && error (" Return statement not permitted in a kernel function $name " )
25
18
26
19
constargs = Array {Bool} (undef, length (args))
@@ -45,13 +38,13 @@ function __kernel(expr, generate_cpu=true, force_inbounds=false)
45
38
if generate_cpu
46
39
def_cpu = deepcopy (def)
47
40
def_cpu[:name ] = cpu_name
48
- transform_cpu! (def_cpu, constargs)
41
+ transform_cpu! (def_cpu, constargs, force_inbounds )
49
42
cpu_function = combinedef (def_cpu)
50
43
end
51
44
52
45
def_gpu = deepcopy (def)
53
46
def_gpu[:name ] = gpu_name = Symbol (:gpu_ , name)
54
- transform_gpu! (def_gpu, constargs)
47
+ transform_gpu! (def_gpu, constargs, force_inbounds )
55
48
gpu_function = combinedef (def_gpu)
56
49
57
50
# create constructor functions
83
76
84
77
# The easy case, transform the function for GPU execution
85
78
# - mark constant arguments by applying `constify`.
86
- function transform_gpu! (def, constargs)
79
+ function transform_gpu! (def, constargs, force_inbounds )
87
80
let_constargs = Expr[]
88
81
for (i, arg) in enumerate (def[:args ])
89
82
if constargs[i]
90
83
push! (let_constargs, :($ arg = $ constify ($ arg)))
91
84
end
92
85
end
93
86
pushfirst! (def[:args ], :__ctx__ )
87
+ body = def[:body ]
88
+ if force_inbounds
89
+ body = quote
90
+ @inbounds $ (body)
91
+ end
92
+ end
94
93
body = quote
95
94
if $ __validindex (__ctx__)
96
- $ (def[ : body] )
95
+ $ (body)
97
96
end
98
97
return nothing
99
98
end
110
109
# - handle indicies
111
110
# - hoist workgroup definitions
112
111
# - hoist uniform variables
113
- function transform_cpu! (def, constargs)
112
+ function transform_cpu! (def, constargs, force_inbounds )
114
113
let_constargs = Expr[]
115
114
for (i, arg) in enumerate (def[:args ])
116
115
if constargs[i]
@@ -121,7 +120,13 @@ function transform_cpu!(def, constargs)
121
120
new_stmts = Expr[]
122
121
body = MacroTools. flatten (def[:body ])
123
122
push! (new_stmts, Expr (:aliasscope ))
123
+ if force_inbounds
124
+ push! (new_stmts, Expr (:inbounds , true ))
125
+ end
124
126
append! (new_stmts, split (body. args))
127
+ if force_inbounds
128
+ push! (new_stmts, Expr (:inbounds , :pop ))
129
+ end
125
130
push! (new_stmts, Expr (:popaliasscope ))
126
131
push! (new_stmts, :(return nothing ))
127
132
def[:body ] = Expr (:let ,
0 commit comments