diff --git a/src/driver.jl b/src/driver.jl index cca15bbc..0c4cc4c3 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -361,9 +361,11 @@ const __llvm_initialized = Ref(false) # we can only clean-up now, as optimization may lower or introduce calls to # functions from the GPU runtime (e.g. julia.gc_alloc_obj -> gpu_gc_pool_alloc) @dispose pm=ModulePassManager() begin + add!(pm, ModulePass("ExternalizeJuliaGlobals", externalize_julia_globals!)) # eliminate all unused internal functions global_optimizer!(pm) global_dce!(pm) + add!(pm, ModulePass("InternalizeJuliaGlobals", internalize_julia_globals!)) strip_dead_prototypes!(pm) # merge constants (such as exception messages) @@ -428,3 +430,36 @@ end return code, () end + + +function externalize_julia_globals!(mod::LLVM.Module) + changed = false + for gbl in LLVM.globals(mod) + if LLVM.linkage(gbl) == LLVM.API.LLVMInternalLinkage && + typeof(LLVM.initializer(gbl)) <: LLVM.PointerNull && + (startswith(LLVM.name(gbl), "jl_global") || + startswith(LLVM.name(gbl), "jl_sym")) + LLVM.linkage!(gbl, LLVM.API.LLVMExternalLinkage) + LLVM.initializer!(gbl, nothing) + LLVM.extinit!(gbl, true) + changed = true + end + end + changed +end +# And reset the back later +function internalize_julia_globals!(mod::LLVM.Module) + changed = false + for gbl in LLVM.globals(mod) + if LLVM.linkage(gbl) == LLVM.API.LLVMExternalLinkage && + LLVM.initializer(gbl) === nothing && + (startswith(LLVM.name(gbl), "jl_global") || + startswith(LLVM.name(gbl), "jl_sym")) + LLVM.extinit!(gbl, false) + LLVM.initializer!(gbl, null(eltype(llvmtype(gbl)))) + LLVM.linkage!(gbl, LLVM.API.LLVMInternalLinkage) + changed = true + end + end + changed +end diff --git a/src/interface.jl b/src/interface.jl index 31e737b8..73f6ae7b 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -277,3 +277,5 @@ function llvm_debug_info(@nospecialize(job::CompilerJob)) LLVM.API.LLVMDebugEmissionKindFullDebug end end + +extern_policy(::CompilerJob) = false \ No newline at end of file diff --git a/src/jlgen.jl b/src/jlgen.jl index 6b8648d3..db08f578 100644 --- a/src/jlgen.jl +++ b/src/jlgen.jl @@ -334,7 +334,7 @@ function compile_method_instance(@nospecialize(job::CompilerJob), if ci_cache_lookup(cache, method_instance, job.source.world, typemax(Cint)) === nothing ci_cache_populate(interp, cache, mt, method_instance, job.source.world, typemax(Cint)) end - + extern = extern_policy(job) ? 2 : 1 # create a callback to look-up function in our cache, # and keep track of the method instances we needed. method_instances = [] @@ -378,21 +378,21 @@ function compile_method_instance(@nospecialize(job::CompilerJob), ts_mod = ThreadSafeModule(mod; ctx) ccall(:jl_create_native, Ptr{Cvoid}, (Vector{MethodInstance}, LLVM.API.LLVMOrcThreadSafeModuleRef, Ptr{Base.CodegenParams}, Cint), - [method_instance], ts_mod, Ref(params), #=extern policy=# 1) + [method_instance], ts_mod, Ref(params), #=extern policy=# extern) elseif VERSION >= v"1.9.0-DEV.115" ccall(:jl_create_native, Ptr{Cvoid}, (Vector{MethodInstance}, LLVM.API.LLVMContextRef, Ptr{Base.CodegenParams}, Cint), - [method_instance], ctx, Ref(params), #=extern policy=# 1) + [method_instance], ctx, Ref(params), #=extern policy=# extern) elseif VERSION >= v"1.8.0-DEV.661" @assert ctx == JuliaContext() ccall(:jl_create_native, Ptr{Cvoid}, (Vector{MethodInstance}, Ptr{Base.CodegenParams}, Cint), - [method_instance], Ref(params), #=extern policy=# 1) + [method_instance], Ref(params), #=extern policy=# extern) else @assert ctx == JuliaContext() ccall(:jl_create_native, Ptr{Cvoid}, (Vector{MethodInstance}, Base.CodegenParams, Cint), - [method_instance], params, #=extern policy=# 1) + [method_instance], params, #=extern policy=# extern) end @assert native_code != C_NULL llvm_mod_ref = if VERSION >= v"1.9.0-DEV.516" diff --git a/src/mcgen.jl b/src/mcgen.jl index 81bec997..5b013b85 100644 --- a/src/mcgen.jl +++ b/src/mcgen.jl @@ -6,12 +6,13 @@ function prepare_execution!(@nospecialize(job::CompilerJob), mod::LLVM.Module) @dispose pm=ModulePassManager() begin global current_job current_job = job - + add!(pm, ModulePass("ExternalizeJuliaGlobals", externalize_julia_globals!)) global_optimizer!(pm) add!(pm, ModulePass("ResolveCPUReferences", resolve_cpu_references!)) global_dce!(pm) + add!(pm, ModulePass("InternalizeJuliaGlobals", internalize_julia_globals!)) strip_dead_prototypes!(pm) run!(pm, mod) diff --git a/src/native.jl b/src/native.jl index 70ef37d1..f12e9c55 100644 --- a/src/native.jl +++ b/src/native.jl @@ -8,7 +8,9 @@ Base.@kwdef struct NativeCompilerTarget <: AbstractCompilerTarget cpu::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUName()) features::String=(LLVM.version() < v"8") ? "" : unsafe_string(LLVM.API.LLVMGetHostCPUFeatures()) always_inline::Bool=false # will mark the job function as always inline - jlruntime::Bool=true # Use Julia runtime for throwing errors, instead of the GPUCompiler support + jlruntime::Bool=true + reloc::LLVM.API.LLVMRelocMode=LLVM.API.LLVMRelocDefault + extern::Bool=false # Use Julia runtime for throwing errors, instead of the GPUCompiler support end llvm_triple(::NativeCompilerTarget) = Sys.MACHINE @@ -17,8 +19,9 @@ function llvm_machine(target::NativeCompilerTarget) triple = llvm_triple(target) t = Target(triple=triple) - - tm = TargetMachine(t, triple, target.cpu, target.features) + optlevel = LLVM.API.LLVMCodeGenLevelDefault + reloc = target.reloc + tm = TargetMachine(t, triple, target.cpu, target.features, optlevel = optlevel, reloc = reloc) asm_verbosity!(tm, true) return tm @@ -32,6 +35,9 @@ function process_entry!(job::CompilerJob{NativeCompilerTarget}, mod::LLVM.Module invoke(process_entry!, Tuple{CompilerJob, LLVM.Module, LLVM.Function}, job, mod, entry) end +GPUCompiler.extern_policy(job::CompilerJob{NativeCompilerTarget,P} where P) = + job.target.extern + ## job runtime_slug(job::CompilerJob{NativeCompilerTarget}) = "native_$(job.target.cpu)-$(hash(job.target.features))$(job.target.jlruntime ? "-jlrt" : "")" diff --git a/test/native.jl b/test/native.jl index aced0f43..ea64358c 100644 --- a/test/native.jl +++ b/test/native.jl @@ -1,3 +1,4 @@ +using Distributed @testset "native" begin include("definitions/native.jl") @@ -415,6 +416,8 @@ end (c::Closure2)(b) = c.x+b @test call_delayed(Closure2(3), 5) == 8 + + end ############################################################################################ @@ -446,4 +449,91 @@ end ############################################################################################ +addprocs(1) + +@everywhere workers() begin + using GPUCompiler + using Libdl + include("definitions/native.jl") +end +@everywhere begin +function generate_shlib(f, tt, name=GPUCompiler.safe_name(repr(f))) + mktemp() do path, io + source = FunctionSpec(f, Base.to_tuple_type(tt), false, name) + target = NativeCompilerTarget(;reloc=LLVM.API.LLVMRelocPIC, extern=true) + params = TestCompilerParams() + job = CompilerJob(target, source, params) + obj, _ = GPUCompiler.codegen(:obj, job; strip=true, only_entry=false, validate=false) + write(io, obj) + flush(io) + # FIXME: Be more portable + run(`ld -dylib -o $path.$dlext $path -L /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib -lSystem`) + return "$path.$dlext", name + end +end +function load_fptr(path, name) + ptr = dlopen("$path", Libdl.RTLD_LOCAL) + fptr = dlsym(ptr, "julia_$name") + @assert fptr != C_NULL + atexit(()->rm("$path")) + fptr +end +generate_shlib_fptr(f, tt, name=GPUCompiler.safe_name(repr(f))) = + load_fptr(generate_shlib(f, tt, name)...) +end + +@static if VERSION >= v"1.7.0-DEV.600" && Sys.isunix() +@testset "shared library emission" begin + @testset "primitive types" begin + f1(x) = x+1 + @test ccall(generate_shlib_fptr(f1, (Int,)), Int, (Int,), 1) == 2 + f2(x,y) = x+y + path, name = generate_shlib(f2, (Int,Int)) + @test fetch(@spawnat 2 ccall(load_fptr(path, name), Int, (Int,Int), 1, 2)) == 3 + end + @testset "runtime calls" begin + function f3() + # Something reasonably complicated + if isdir(homedir()) + true + else + false + end + end + @test ccall(generate_shlib_fptr(f3, ()), Bool, ()) + end + @testset "String/Symbol" begin + f4(str) = str*"!" + @test ccall(generate_shlib_fptr(f4, (String,)), String, (String,), "Hello") == "Hello!" + + f5() = :asymbol + @test ccall(generate_shlib_fptr(f5, ()), Symbol, ()) == :asymbol + + f6(x) = x == :asymbol ? true : false + @test ccall(generate_shlib_fptr(f6, (Symbol,)), Bool, (Symbol,), :asymbol) + @test !ccall(generate_shlib_fptr(f6, (Symbol,)), Bool, (Symbol,), :bsymbol) + end + @testset "closures" begin + y = [42.0] + function cf1(x) + x + y[1] + end + @test ccall(generate_shlib_fptr(cf1, (Float64,)), Float64, (Any, Float64,), cf1, 1.0) == 43.0 + end + @testset "mutation" begin + function cf2(A, sym) + if sym != :asymbol + A[] = true + else + A[] = false + end + return nothing + end + A = Ref(false) + fptr = generate_shlib_fptr(cf2, (Base.RefValue{Bool}, Symbol)) + ccall(fptr, Nothing, (Any, Symbol), A, :asymbol); @test !A[] + ccall(fptr, Nothing, (Any, Symbol), A, :bsymbol); @test A[] + end +end + end