Skip to content

Commit

Permalink
Merge pull request #41 from JuliaGPU/jps/llvm-stateless
Browse files Browse the repository at this point in the history
Move to stateless LLVM contexts
  • Loading branch information
jpsamaroo authored Aug 21, 2020
2 parents 7c5ba2e + 5110ac9 commit e6b4d61
Show file tree
Hide file tree
Showing 15 changed files with 527 additions and 489 deletions.
10 changes: 4 additions & 6 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,21 +49,19 @@ version = "5.1.0"

[[GPUCompiler]]
deps = ["DataStructures", "InteractiveUtils", "LLVM", "Libdl", "TimerOutputs", "UUIDs"]
git-tree-sha1 = "f0fdbfcd2bcd380aa50c43bdab753780f248581e"
repo-rev = "master"
repo-url = "https://github.com/JuliaGPU/GPUCompiler.jl.git"
git-tree-sha1 = "10b1a3aa52de30e9219f3ed147cb09e72cf6d2e8"
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
version = "0.6.0"
version = "0.7.0"

[[InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

[[LLVM]]
deps = ["CEnum", "Libdl", "Printf", "Unicode"]
git-tree-sha1 = "a662366a5d485dee882077e8da3e1a95a86d097f"
git-tree-sha1 = "d57affa9580f5e9fb44260e8f9366dc977f01a60"
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
version = "2.0.0"
version = "3.0.0"

[[Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
Expand Down
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "AMDGPU"
uuid = "21141c5a-9bdb-4563-92ae-f87d6854732e"
authors = ["Julian P Samaroo <[email protected]>"]
version = "0.1.1"
version = "0.1.2"

[deps]
AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
Expand All @@ -22,8 +22,8 @@ Adapt = "0.4, 1.0, 2.0"
BinaryProvider = "0.5"
CEnum = "0.2, 0.3, 0.4"
GPUArrays = "2, 3, 4, 5"
GPUCompiler = "0.4, 0.5, 0.6"
LLVM = "2"
GPUCompiler = "0.7"
LLVM = "3"
Requires = "1"
Setfield = "0.5, 0.6, 0.7"
julia = "1.4"
Expand Down
59 changes: 30 additions & 29 deletions src/device/gcn/assertion.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,38 +37,39 @@ end

assert_counter = 0

@generated function rocassert_fail(::Val{msg}, ::Val{file}, ::Val{line}) where
{msg, file, line}
T_void = LLVM.VoidType(JuliaContext())
T_int32 = LLVM.Int32Type(JuliaContext())
T_pint8 = LLVM.PointerType(LLVM.Int8Type(JuliaContext()))
@generated function rocassert_fail(::Val{msg}, ::Val{file}, ::Val{line}) where {msg, file, line}
JuliaContext() do ctx
T_void = LLVM.VoidType(ctx)
T_int32 = LLVM.Int32Type(ctx)
T_pint8 = LLVM.PointerType(LLVM.Int8Type(ctx))

# create function
llvm_f, _ = create_function()
mod = LLVM.parent(llvm_f)
# create function
llvm_f, _ = create_function()
mod = LLVM.parent(llvm_f)

# generate IR
Builder(JuliaContext()) do builder
entry = BasicBlock(llvm_f, "entry", JuliaContext())
position!(builder, entry)
global assert_counter
assert_counter += 1
message = globalstring_ptr!(builder, String(msg), "assert_message_$(assert_counter)")
file = globalstring_ptr!(builder, String(file), "assert_file_$(assert_counter)")
line = ConstantInt(T_int32, line)
func = globalstring_ptr!(builder, "unknown", "assert_function_$(assert_counter)")
charSize = ConstantInt(Csize_t(1), JuliaContext())
# generate IR
Builder(ctx) do builder
entry = BasicBlock(llvm_f, "entry", ctx)
position!(builder, entry)
global assert_counter
assert_counter += 1
message = globalstring_ptr!(builder, String(msg), "assert_message_$(assert_counter)")
file = globalstring_ptr!(builder, String(file), "assert_file_$(assert_counter)")
line = ConstantInt(T_int32, line)
func = globalstring_ptr!(builder, "unknown", "assert_function_$(assert_counter)")
charSize = ConstantInt(Csize_t(1), ctx)

# invoke __assertfail and return
# TODO: mark noreturn since we don't use ptxas?
assertfail_typ =
LLVM.FunctionType(T_void,
[T_pint8, T_pint8, T_int32, T_pint8, llvmtype(charSize)])
assertfail = LLVM.Function(mod, "__assertfail", assertfail_typ)
call!(builder, assertfail, [message, file, line, func, charSize])
ret!(builder)
end
# invoke __assertfail and return
# TODO: mark noreturn since we don't use ptxas?
assertfail_typ =
LLVM.FunctionType(T_void,
[T_pint8, T_pint8, T_int32, T_pint8, llvmtype(charSize)])
assertfail = LLVM.Function(mod, "__assertfail", assertfail_typ)
call!(builder, assertfail, [message, file, line, func, charSize])
ret!(builder)
end

call_function(llvm_f, Nothing, Tuple{})
call_function(llvm_f, Nothing, Tuple{})
end
end

62 changes: 33 additions & 29 deletions src/device/gcn/atomics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,28 @@
# > that points to either the global address space or the shared address space.

@generated function llvm_atomic_op(::Val{binop}, ptr::DevicePtr{T,A}, val::T) where {binop, T, A}
T_val = convert(LLVMType, T)
T_ptr = convert(LLVMType, DevicePtr{T,A})
T_actual_ptr = LLVM.PointerType(T_val)
JuliaContext() do ctx
T_val = convert(LLVMType, T, ctx)
T_ptr = convert(LLVMType, DevicePtr{T,A}, ctx)
T_actual_ptr = LLVM.PointerType(T_val)

llvm_f, _ = create_function(T_val, [T_ptr, T_val])
llvm_f, _ = create_function(T_val, [T_ptr, T_val])

Builder(JuliaContext()) do builder
entry = BasicBlock(llvm_f, "entry", JuliaContext())
position!(builder, entry)
Builder(ctx) do builder
entry = BasicBlock(llvm_f, "entry", ctx)
position!(builder, entry)

actual_ptr = inttoptr!(builder, parameters(llvm_f)[1], T_actual_ptr)
actual_ptr = inttoptr!(builder, parameters(llvm_f)[1], T_actual_ptr)

rv = atomic_rmw!(builder, binop,
actual_ptr, parameters(llvm_f)[2],
atomic_acquire_release, #=single_threaded=# false)
rv = atomic_rmw!(builder, binop,
actual_ptr, parameters(llvm_f)[2],
atomic_acquire_release, #=single_threaded=# false)

ret!(builder, rv)
end
ret!(builder, rv)
end

call_function(llvm_f, T, Tuple{DevicePtr{T,A}, T}, :((ptr,val)))
call_function(llvm_f, T, Tuple{DevicePtr{T,A}, T}, :((ptr,val)))
end
end

const binops = Dict(
Expand Down Expand Up @@ -82,28 +84,30 @@ for T in (Int32, Int64, UInt32, UInt64)
end

@generated function llvm_atomic_cas(ptr::DevicePtr{T,A}, cmp::T, val::T) where {T, A}
T_val = convert(LLVMType, T)
T_ptr = convert(LLVMType, DevicePtr{T,A})
T_actual_ptr = LLVM.PointerType(T_val)
JuliaContext() do ctx
T_val = convert(LLVMType, T, ctx)
T_ptr = convert(LLVMType, DevicePtr{T,A}, ctx)
T_actual_ptr = LLVM.PointerType(T_val)

llvm_f, _ = create_function(T_val, [T_ptr, T_val, T_val])
llvm_f, _ = create_function(T_val, [T_ptr, T_val, T_val])

Builder(JuliaContext()) do builder
entry = BasicBlock(llvm_f, "entry", JuliaContext())
position!(builder, entry)
Builder(ctx) do builder
entry = BasicBlock(llvm_f, "entry", ctx)
position!(builder, entry)

actual_ptr = inttoptr!(builder, parameters(llvm_f)[1], T_actual_ptr)
actual_ptr = inttoptr!(builder, parameters(llvm_f)[1], T_actual_ptr)

res = atomic_cmpxchg!(builder, actual_ptr, parameters(llvm_f)[2],
parameters(llvm_f)[3], atomic_acquire_release, atomic_acquire,
#=single threaded=# false)
res = atomic_cmpxchg!(builder, actual_ptr, parameters(llvm_f)[2],
parameters(llvm_f)[3], atomic_acquire_release, atomic_acquire,
#=single threaded=# false)

rv = extract_value!(builder, res, 0)
rv = extract_value!(builder, res, 0)

ret!(builder, rv)
end
ret!(builder, rv)
end

call_function(llvm_f, T, Tuple{DevicePtr{T,A}, T, T}, :((ptr,cmp,val)))
call_function(llvm_f, T, Tuple{DevicePtr{T,A}, T, T}, :((ptr,cmp,val)))
end
end

for T in (Int32, Int64, UInt32, UInt64)
Expand Down
56 changes: 29 additions & 27 deletions src/device/gcn/execution_control.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,36 @@
const completion_signal_base = _packet_offsets[findfirst(x->x==:completion_signal,_packet_names)]

@generated function _completion_signal()
T_int8 = LLVM.Int8Type(JuliaContext())
T_int64 = LLVM.Int64Type(JuliaContext())
_as = convert(Int, AS.Constant)
T_ptr_i8 = LLVM.PointerType(T_int8, _as)
T_ptr_i64 = LLVM.PointerType(T_int64, _as)

# create function
llvm_f, _ = create_function(T_int64)
mod = LLVM.parent(llvm_f)

# generate IR
Builder(JuliaContext()) do builder
entry = BasicBlock(llvm_f, "entry", JuliaContext())
position!(builder, entry)

# get the kernel dispatch pointer
intr_typ = LLVM.FunctionType(T_ptr_i8)
intr = LLVM.Function(mod, "llvm.amdgcn.dispatch.ptr", intr_typ)
ptr = call!(builder, intr)

# load the index
signal_ptr_i8 = inbounds_gep!(builder, ptr, [ConstantInt(completion_signal_base, JuliaContext())])
signal_ptr = bitcast!(builder, signal_ptr_i8, T_ptr_i64)
signal = load!(builder, signal_ptr)
ret!(builder, signal)
JuliaContext() do ctx
T_int8 = LLVM.Int8Type(ctx)
T_int64 = LLVM.Int64Type(ctx)
_as = convert(Int, AS.Constant)
T_ptr_i8 = LLVM.PointerType(T_int8, _as)
T_ptr_i64 = LLVM.PointerType(T_int64, _as)

# create function
llvm_f, _ = create_function(T_int64)
mod = LLVM.parent(llvm_f)

# generate IR
Builder(ctx) do builder
entry = BasicBlock(llvm_f, "entry", ctx)
position!(builder, entry)

# get the kernel dispatch pointer
intr_typ = LLVM.FunctionType(T_ptr_i8)
intr = LLVM.Function(mod, "llvm.amdgcn.dispatch.ptr", intr_typ)
ptr = call!(builder, intr)

# load the index
signal_ptr_i8 = inbounds_gep!(builder, ptr, [ConstantInt(completion_signal_base, ctx)])
signal_ptr = bitcast!(builder, signal_ptr_i8, T_ptr_i64)
signal = load!(builder, signal_ptr)
ret!(builder, signal)
end

call_function(llvm_f, UInt64)
end

call_function(llvm_f, UInt64)
end

signal_completion(value::Int64) = device_signal_store!(_completion_signal(), value)
Expand Down
98 changes: 51 additions & 47 deletions src/device/gcn/hostcall.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,66 +45,70 @@ end
## device signal functions
# TODO: device_signal_load, device_signal_add!, etc.
@inline @generated function device_signal_store!(signal::UInt64, value::Int64)
T_nothing = convert(LLVMType, Nothing)
T_i32 = LLVM.Int32Type(JuliaContext())
T_i64 = LLVM.Int64Type(JuliaContext())
JuliaContext() do ctx
T_nothing = convert(LLVMType, Nothing, ctx)
T_i32 = LLVM.Int32Type(ctx)
T_i64 = LLVM.Int64Type(ctx)

# create a function
llvm_f, _ = create_function(T_nothing, [T_i64, T_i64])
mod = LLVM.parent(llvm_f)
# create a function
llvm_f, _ = create_function(T_nothing, [T_i64, T_i64])
mod = LLVM.parent(llvm_f)

# generate IR
Builder(JuliaContext()) do builder
entry = BasicBlock(llvm_f, "entry", JuliaContext())
position!(builder, entry)
# generate IR
Builder(ctx) do builder
entry = BasicBlock(llvm_f, "entry", ctx)
position!(builder, entry)

T_signal_store = LLVM.FunctionType(T_nothing, [T_i64, T_i64, T_i32])
signal_store = LLVM.Function(mod, "__ockl_hsa_signal_store", T_signal_store)
call!(builder, signal_store, [parameters(llvm_f)[1],
parameters(llvm_f)[2],
# __ATOMIC_RELEASE == 3
ConstantInt(Int32(3), JuliaContext())])
T_signal_store = LLVM.FunctionType(T_nothing, [T_i64, T_i64, T_i32])
signal_store = LLVM.Function(mod, "__ockl_hsa_signal_store", T_signal_store)
call!(builder, signal_store, [parameters(llvm_f)[1],
parameters(llvm_f)[2],
# __ATOMIC_RELEASE == 3
ConstantInt(Int32(3), ctx)])

ret!(builder)
end
ret!(builder)
end

call_function(llvm_f, Nothing, Tuple{UInt64,Int64}, :((signal,value)))
call_function(llvm_f, Nothing, Tuple{UInt64,Int64}, :((signal,value)))
end
end
@inline @generated function device_signal_wait(signal::UInt64, value::Int64)
T_nothing = convert(LLVMType, Nothing)
T_i32 = LLVM.Int32Type(JuliaContext())
T_i64 = LLVM.Int64Type(JuliaContext())
JuliaContext() do ctx
T_nothing = convert(LLVMType, Nothing, ctx)
T_i32 = LLVM.Int32Type(ctx)
T_i64 = LLVM.Int64Type(ctx)

# create a function
llvm_f, _ = create_function(T_nothing, [T_i64, T_i64])
mod = LLVM.parent(llvm_f)
# create a function
llvm_f, _ = create_function(T_nothing, [T_i64, T_i64])
mod = LLVM.parent(llvm_f)

# generate IR
Builder(JuliaContext()) do builder
entry = BasicBlock(llvm_f, "entry", JuliaContext())
signal_match = BasicBlock(llvm_f, "signal_match", JuliaContext())
signal_miss = BasicBlock(llvm_f, "signal_miss", JuliaContext())
# generate IR
Builder(ctx) do builder
entry = BasicBlock(llvm_f, "entry", ctx)
signal_match = BasicBlock(llvm_f, "signal_match", ctx)
signal_miss = BasicBlock(llvm_f, "signal_miss", ctx)

position!(builder, entry)
br!(builder, signal_miss)
position!(builder, entry)
br!(builder, signal_miss)

position!(builder, signal_miss)
T_sleep = LLVM.FunctionType(T_nothing, [T_i32])
sleep_f = LLVM.Function(mod, "llvm.amdgcn.s.sleep", T_sleep)
call!(builder, sleep_f, [ConstantInt(Int32(1), JuliaContext())])
T_signal_load = LLVM.FunctionType(T_i64, [T_i64, T_i32])
signal_load = LLVM.Function(mod, "__ockl_hsa_signal_load", T_signal_load)
loaded_value = call!(builder, signal_load, [parameters(llvm_f)[1],
# __ATOMIC_ACQUIRE == 2
ConstantInt(Int32(2), JuliaContext())])
cond = icmp!(builder, LLVM.API.LLVMIntEQ, loaded_value, parameters(llvm_f)[2])
br!(builder, cond, signal_match, signal_miss)
position!(builder, signal_miss)
T_sleep = LLVM.FunctionType(T_nothing, [T_i32])
sleep_f = LLVM.Function(mod, "llvm.amdgcn.s.sleep", T_sleep)
call!(builder, sleep_f, [ConstantInt(Int32(1), ctx)])
T_signal_load = LLVM.FunctionType(T_i64, [T_i64, T_i32])
signal_load = LLVM.Function(mod, "__ockl_hsa_signal_load", T_signal_load)
loaded_value = call!(builder, signal_load, [parameters(llvm_f)[1],
# __ATOMIC_ACQUIRE == 2
ConstantInt(Int32(2), ctx)])
cond = icmp!(builder, LLVM.API.LLVMIntEQ, loaded_value, parameters(llvm_f)[2])
br!(builder, cond, signal_match, signal_miss)

position!(builder, signal_match)
ret!(builder)
end
position!(builder, signal_match)
ret!(builder)
end

call_function(llvm_f, Nothing, Tuple{UInt64,Int64}, :((signal,value)))
call_function(llvm_f, Nothing, Tuple{UInt64,Int64}, :((signal,value)))
end
end
"Calls the host function stored in `hc` with arguments `args`."
@inline @generated function hostcall!(hc::HostCall{UInt64,RT,AT}, args...) where {RT,AT}
Expand Down
Loading

2 comments on commit e6b4d61

@jpsamaroo
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/19937

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.1.2 -m "<description of version>" e6b4d61bca645756e4d86f317786a79f25980fd8
git push origin v0.1.2

Please sign in to comment.