Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to LLVM.jl 6. #1976

Merged
merged 1 commit into from
Jun 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ version = "0.1.7"

[[Compat]]
deps = ["Dates", "LinearAlgebra", "UUIDs"]
git-tree-sha1 = "7a60c856b9fa189eb34f5f8a6f6b5529b7942957"
git-tree-sha1 = "4e88377ae7ebeaf29a047aa1ee40826e0b708a5d"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "4.6.1"
version = "4.7.0"

[[CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
Expand All @@ -99,9 +99,9 @@ version = "0.1.9"

[[GPUArrays]]
deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"]
git-tree-sha1 = "a3351bc577a6b49297248aadc23a4add1097c2ac"
git-tree-sha1 = "2e57b4a4f9cc15e85a24d603256fe08e527f48d1"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "8.7.1"
version = "8.8.1"

[[GPUArraysCore]]
deps = ["Adapt"]
Expand All @@ -111,9 +111,9 @@ version = "0.1.5"

[[GPUCompiler]]
deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "Scratch", "TimerOutputs", "UUIDs"]
git-tree-sha1 = "c47730aca2381f935a52fd732190e424c507230e"
git-tree-sha1 = "0c21d9b7ff70859bdb2ca7dab814e144676c31db"
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
version = "0.21.0"
version = "0.21.1"

[[InteractiveUtils]]
deps = ["Markdown"]
Expand All @@ -138,15 +138,15 @@ version = "1.4.1"

[[KernelAbstractions]]
deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "PrecompileTools", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"]
git-tree-sha1 = "47be64f040a7ece575c2b5f53ca6da7b548d69f4"
git-tree-sha1 = "b48617c5d764908b5fac493cd907cf33cc11eec1"
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
version = "0.9.4"
version = "0.9.6"

[[LLVM]]
deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"]
git-tree-sha1 = "5007c1421563108110bbd57f63d8ad4565808818"
git-tree-sha1 = "7d5788011dd273788146d40eb5b1fbdc199d0296"
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
version = "5.2.0"
version = "6.0.1"

[[LLVMExtra_jll]]
deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"]
Expand Down Expand Up @@ -343,9 +343,9 @@ version = "0.2.1"

[[UnsafeAtomicsLLVM]]
deps = ["LLVM", "UnsafeAtomics"]
git-tree-sha1 = "ea37e6066bf194ab78f4e747f5245261f17a7175"
git-tree-sha1 = "323e3d0acf5e78a56dfae7bd8928c989b4f3083e"
uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249"
version = "0.1.2"
version = "0.1.3"

[[Zlib_jll]]
deps = ["Libdl"]
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ ExprTools = "0.1"
GPUArrays = "8.6"
GPUCompiler = "0.21"
KernelAbstractions = "0.9.2"
LLVM = "5"
LLVM = "6"
Preferences = "1"
Random123 = "1.2"
RandomNumbers = "1.5.3"
Expand Down
14 changes: 5 additions & 9 deletions src/compiler/compilation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ function GPUCompiler.link_libraries!(@nospecialize(job::CUDACompilerJob), mod::L
return
end

ctx = LLVM.context(mod)
lib = parse(LLVM.Module, read(libdevice); ctx)
lib = parse(LLVM.Module, read(libdevice))

# override libdevice's triple and datalayout to avoid warnings
triple!(lib, triple(mod))
Expand All @@ -32,7 +31,7 @@ function GPUCompiler.link_libraries!(@nospecialize(job::CUDACompilerJob), mod::L

@dispose pm=ModulePassManager() begin
push!(metadata(mod)["nvvm-reflect-ftz"],
MDNode([ConstantInt(Int32(1); ctx)]; ctx))
MDNode([ConstantInt(Int32(1))]))
run!(pm, mod)
end

Expand Down Expand Up @@ -99,14 +98,11 @@ end

# compile to executable machine code
function compile(@nospecialize(job::CompilerJob))
# lower to PTX
# TODO: on 1.9, this actually creates a context. cache those.
JuliaContext() do ctx
compile(job, ctx)
asm, meta = JuliaContext() do ctx
GPUCompiler.compile(:asm, job)
end
end
function compile(@nospecialize(job::CompilerJob), ctx)
# lower to PTX
asm, meta = GPUCompiler.compile(:asm, job; ctx)

# remove extraneous debug info on lower debug levels
if Base.JLOptions().debug_level < 2
Expand Down
12 changes: 6 additions & 6 deletions src/device/intrinsics/assertion.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,17 @@ assert_counter = 0
@generated function cuassert_fail(::Val{msg}, ::Val{file}, ::Val{line}) where
{msg, file, line}
@dispose ctx=Context() begin
T_void = LLVM.VoidType(ctx)
T_int32 = LLVM.Int32Type(ctx)
T_pint8 = LLVM.PointerType(LLVM.Int8Type(ctx))
T_void = LLVM.VoidType()
T_int32 = LLVM.Int32Type()
T_pint8 = LLVM.PointerType(LLVM.Int8Type())

# create function
llvm_f, _ = create_function(T_void)
mod = LLVM.parent(llvm_f)

# generate IR
@dispose builder=IRBuilder(ctx) begin
entry = BasicBlock(llvm_f, "entry"; ctx)
@dispose builder=IRBuilder() begin
entry = BasicBlock(llvm_f, "entry")
position!(builder, entry)

global assert_counter
Expand All @@ -60,7 +60,7 @@ assert_counter = 0
file = globalstring_ptr!(builder, String(file), "assert_file_$(assert_counter)")
line = ConstantInt(T_int32, line)
func = globalstring_ptr!(builder, "unknown", "assert_function_$(assert_counter)")
charSize = ConstantInt(Csize_t(1); ctx)
charSize = ConstantInt(Csize_t(1))

# invoke __assertfail and return
# NOTE: we don't mark noreturn since that control flow might confuse ptxas
Expand Down
16 changes: 8 additions & 8 deletions src/device/intrinsics/atomics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ const atomic_acquire_release = LLVM.API.LLVMAtomicOrderingAcquireRelease
# > that points to either the global address space or the shared address space.
@generated function llvm_atomic_op(::Val{binop}, ptr::LLVMPtr{T,A}, val::T) where {binop, T, A}
@dispose ctx=Context() begin
T_val = convert(LLVMType, T; ctx)
T_ptr = convert(LLVMType, ptr; ctx)
T_val = convert(LLVMType, T)
T_ptr = convert(LLVMType, ptr)

T_typed_ptr = LLVM.PointerType(T_val, A)

llvm_f, _ = create_function(T_val, [T_ptr, T_val])

@dispose builder=IRBuilder(ctx) begin
entry = BasicBlock(llvm_f, "entry"; ctx)
@dispose builder=IRBuilder() begin
entry = BasicBlock(llvm_f, "entry")
position!(builder, entry)

typed_ptr = bitcast!(builder, parameters(llvm_f)[1], T_typed_ptr)
Expand Down Expand Up @@ -109,15 +109,15 @@ end

@generated function llvm_atomic_cas(ptr::LLVMPtr{T,A}, cmp::T, val::T) where {T, A}
@dispose ctx=Context() begin
T_val = convert(LLVMType, T; ctx)
T_ptr = convert(LLVMType, ptr,;ctx)
T_val = convert(LLVMType, T)
T_ptr = convert(LLVMType, ptr)

T_typed_ptr = LLVM.PointerType(T_val, A)

llvm_f, _ = create_function(T_val, [T_ptr, T_val, T_val])

@dispose builder=IRBuilder(ctx) begin
entry = BasicBlock(llvm_f, "entry"; ctx)
@dispose builder=IRBuilder() begin
entry = BasicBlock(llvm_f, "entry")
position!(builder, entry)

typed_ptr = bitcast!(builder, parameters(llvm_f)[1], T_typed_ptr)
Expand Down
11 changes: 5 additions & 6 deletions src/device/intrinsics/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ export

@generated function _index(::Val{name}, ::Val{range}) where {name, range}
@dispose ctx=Context() begin
T_int32 = LLVM.Int32Type(ctx)
T_int32 = LLVM.Int32Type()

# create function
llvm_f, _ = create_function(T_int32)
mod = LLVM.parent(llvm_f)

# generate IR
@dispose builder=IRBuilder(ctx) begin
entry = BasicBlock(llvm_f, "entry"; ctx)
@dispose builder=IRBuilder() begin
entry = BasicBlock(llvm_f, "entry")
position!(builder, entry)

# call the indexing intrinsic
Expand All @@ -22,9 +22,8 @@ export
idx = call!(builder, intr_typ, intr)

# attach range metadata
range_metadata = MDNode([ConstantInt(Int32(range.start); ctx),
ConstantInt(Int32(range.stop); ctx)];
ctx)
range_metadata = MDNode([ConstantInt(Int32(range.start)),
ConstantInt(Int32(range.stop))])
metadata(idx)[LLVM.MD_range] = range_metadata

ret!(builder, idx)
Expand Down
16 changes: 8 additions & 8 deletions src/device/intrinsics/memory_dynamic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ export malloc

@generated function malloc(sz::Csize_t)
@dispose ctx=Context() begin
T_pint8 = LLVM.PointerType(LLVM.Int8Type(ctx))
T_size = convert(LLVMType, Csize_t; ctx)
T_ptr = convert(LLVMType, Ptr{Cvoid}; ctx)
T_pint8 = LLVM.PointerType(LLVM.Int8Type())
T_size = convert(LLVMType, Csize_t)
T_ptr = convert(LLVMType, Ptr{Cvoid})

# create function
llvm_f, _ = create_function(T_ptr, [T_size])
Expand All @@ -20,16 +20,16 @@ export malloc
#let attrs = function_attributes(intr)
# AllocSizeNumElemsNotPresent = reinterpret(Cuint, Cint(-1))
# packed_allocsize = Int64(1) << 32 | AllocSizeNumElemsNotPresent
# push!(attrs, EnumAttribute("allocsize", packed_allocsize; ctx))
# push!(attrs, EnumAttribute("allocsize", packed_allocsize))
#end
#let attrs = return_attributes(intr)
# push!(attrs, EnumAttribute("noalias", 0; ctx))
# push!(attrs, EnumAttribute("nonnull", 0; ctx))
# push!(attrs, EnumAttribute("noalias", 0))
# push!(attrs, EnumAttribute("nonnull", 0))
#end

# generate IR
@dispose builder=IRBuilder(ctx) begin
entry = BasicBlock(llvm_f, "entry"; ctx)
@dispose builder=IRBuilder() begin
entry = BasicBlock(llvm_f, "entry")
position!(builder, entry)

ptr = call!(builder, intr_typ, intr, [parameters(llvm_f)[1]])
Expand Down
10 changes: 5 additions & 5 deletions src/device/intrinsics/memory_shared.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ dynamic_smem_size() =
# get a pointer to shared memory, with known (static) or zero length (dynamic shared memory)
@generated function emit_shmem(::Type{T}, ::Val{len}=Val(0)) where {T,len}
@dispose ctx=Context() begin
T_int8 = LLVM.Int8Type(ctx)
T_ptr = convert(LLVMType, LLVMPtr{T,AS.Shared}; ctx)
T_int8 = LLVM.Int8Type()
T_ptr = convert(LLVMType, LLVMPtr{T,AS.Shared})

# create a function
llvm_f, _ = create_function(T_ptr)
Expand Down Expand Up @@ -121,11 +121,11 @@ dynamic_smem_size() =
alignment!(gv, align)

# generate IR
@dispose builder=IRBuilder(ctx) begin
entry = BasicBlock(llvm_f, "entry"; ctx)
@dispose builder=IRBuilder() begin
entry = BasicBlock(llvm_f, "entry")
position!(builder, entry)

ptr = gep!(builder, gv_typ, gv, [ConstantInt(0; ctx), ConstantInt(0; ctx)])
ptr = gep!(builder, gv_typ, gv, [ConstantInt(0), ConstantInt(0)])

untyped_ptr = bitcast!(builder, ptr, T_ptr)

Expand Down
14 changes: 7 additions & 7 deletions src/device/intrinsics/output.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,18 @@ end
arg_exprs = [:( argspec[$i] ) for i in 1:length(argspec)]
arg_types = [argspec...]

T_void = LLVM.VoidType(ctx)
T_int32 = LLVM.Int32Type(ctx)
T_pint8 = LLVM.PointerType(LLVM.Int8Type(ctx))
T_void = LLVM.VoidType()
T_int32 = LLVM.Int32Type()
T_pint8 = LLVM.PointerType(LLVM.Int8Type())

# create functions
param_types = LLVMType[convert(LLVMType, typ; ctx) for typ in arg_types]
param_types = LLVMType[convert(LLVMType, typ) for typ in arg_types]
llvm_f, llvm_ft = create_function(T_int32, param_types)
mod = LLVM.parent(llvm_f)

# generate IR
@dispose builder=IRBuilder(ctx) begin
entry = BasicBlock(llvm_f, "entry"; ctx)
@dispose builder=IRBuilder() begin
entry = BasicBlock(llvm_f, "entry")
position!(builder, entry)

str = globalstring_ptr!(builder, String(fmt))
Expand All @@ -60,7 +60,7 @@ end
if isempty(argspec)
buffer = LLVM.PointerNull(T_pint8)
else
argtypes = LLVM.StructType("printf_args"; ctx)
argtypes = LLVM.StructType("printf_args")
elements!(argtypes, param_types)

args = alloca!(builder, argtypes)
Expand Down
12 changes: 6 additions & 6 deletions src/device/random.jl
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ end
# a hacky method of exposing constant tables as constant GPU memory
function emit_constant_array(name::Symbol, data::AbstractArray{T}) where {T}
@dispose ctx=Context() begin
T_val = convert(LLVMType, T; ctx)
T_ptr = convert(LLVMType, LLVMPtr{T,AS.Constant}; ctx)
T_val = convert(LLVMType, T)
T_ptr = convert(LLVMType, LLVMPtr{T,AS.Constant})

# define function and get LLVM module
llvm_f, _ = create_function(T_ptr)
Expand All @@ -170,14 +170,14 @@ function emit_constant_array(name::Symbol, data::AbstractArray{T}) where {T}
gv = GlobalVariable(mod, T_global, "gpu_$(name)_data", AS.Constant)
alignment!(gv, 16)
linkage!(gv, LLVM.API.LLVMInternalLinkage)
initializer!(gv, ConstantArray(data; ctx))
initializer!(gv, ConstantArray(data))

# generate IR
@dispose builder=IRBuilder(ctx) begin
entry = BasicBlock(llvm_f, "entry"; ctx)
@dispose builder=IRBuilder() begin
entry = BasicBlock(llvm_f, "entry")
position!(builder, entry)

ptr = gep!(builder, T_global, gv, [ConstantInt(0; ctx), ConstantInt(0; ctx)])
ptr = gep!(builder, T_global, gv, [ConstantInt(0), ConstantInt(0)])

untyped_ptr = bitcast!(builder, ptr, T_ptr)

Expand Down
2 changes: 1 addition & 1 deletion src/device/runtime.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ function precompile_runtime(caps=CUDA.llvm_compat(LLVM.version()).cap)
target = PTXCompilerTarget(; cap)
config = CompilerConfig(target, params)
job = CompilerJob(mi, config)
GPUCompiler.load_runtime(job; ctx)
GPUCompiler.load_runtime(job)
end
end
return
Expand Down