From cba9bd5cb07717daabcddd9850957b3a1cfe3e47 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Wed, 8 Jan 2025 16:56:05 -0800 Subject: [PATCH] [DirectX] Implement the resource.load.rawbuffer intrinsic (#121012) This introduces `@llvm.dx.resource.load.rawbuffer` and generalizes the buffer load docs under DirectX/DXILResources. This resolves the "load" parts of #106188 --- llvm/docs/DirectX/DXILResources.rst | 157 +++++++++--- llvm/include/llvm/IR/IntrinsicsDirectX.td | 4 + llvm/lib/Target/DirectX/DXIL.td | 19 ++ llvm/lib/Target/DirectX/DXILOpBuilder.cpp | 4 + llvm/lib/Target/DirectX/DXILOpLowering.cpp | 45 ++++ llvm/test/CodeGen/DirectX/BufferLoad-sm61.ll | 60 +++++ .../CodeGen/DirectX/RawBufferLoad-error64.ll | 24 ++ llvm/test/CodeGen/DirectX/RawBufferLoad.ll | 232 ++++++++++++++++++ llvm/utils/TableGen/DXILEmitter.cpp | 4 +- 9 files changed, 519 insertions(+), 30 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/BufferLoad-sm61.ll create mode 100644 llvm/test/CodeGen/DirectX/RawBufferLoad-error64.ll create mode 100644 llvm/test/CodeGen/DirectX/RawBufferLoad.ll diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst index fafcb247f49c94c..857d29e48363b05 100644 --- a/llvm/docs/DirectX/DXILResources.rst +++ b/llvm/docs/DirectX/DXILResources.rst @@ -318,39 +318,43 @@ Examples: %ptr = call ptr @llvm.dx.resource.getpointer.p0.tdx.TypedBuffer_v4f32_0_0_0t( target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %index) -16-byte Loads, Samples, and Gathers ------------------------------------ - -*relevant types: TypedBuffer, CBuffer, and Textures* - -TypedBuffer, CBuffer, and Texture loads, as well as samples and gathers, can -return 1 to 4 elements from the given resource, to a maximum of 16 bytes of -data. DXIL's modeling of this is influenced by DirectX and DXBC's history and -it generally treats these operations as returning 4 32-bit values. For 16-bit -elements the values are 16-bit values, and for 64-bit values the operations -return 4 32-bit integers and emit further code to construct the double. - -In DXIL, these operations return `ResRet`_ and `CBufRet`_ values, are structs -containing 4 elements of the same type, and in the case of `ResRet` a 5th -element that is used by the `CheckAccessFullyMapped`_ operation. - -In LLVM IR the intrinsics will return the contained type of the resource -instead. That is, ``llvm.dx.resource.load.typedbuffer`` from a -``Buffer`` would return a single float, from ``Buffer`` a vector -of 4 floats, and from ``Buffer`` a vector of two doubles, etc. The -operations are then expanded out to match DXIL's format during lowering. - -In order to support ``CheckAccessFullyMapped``, we need these intrinsics to -return an anonymous struct with element-0 being the contained type, and -element-1 being the ``i1`` result of a ``CheckAccessFullyMapped`` call. We -don't have a separate call to ``CheckAccessFullyMapped`` at all, since that's -the only operation that can possibly be done on this value. In practice this -may mean we insert a DXIL operation for the check when this was missing in the -HLSL source, but this actually matches DXC's behaviour in practice. +Loads, Samples, and Gathers +--------------------------- + +*relevant types: Buffers, CBuffers, and Textures* + +All load, sample, and gather operations in DXIL return a `ResRet`_ type, and +CBuffer loads return a similar `CBufRet`_ type. These types are structs +containing 4 elements of some basic type, and in the case of `ResRet` a 5th +element that is used by the `CheckAccessFullyMapped`_ operation. Some of these +operations, like `RawBufferLoad`_ include a mask and/or alignment that tell us +some information about how to interpret those four values. + +In the LLVM IR representations of these operations we instead return scalars or +vectors, but we keep the requirement that we only return up to 4 elements of a +basic type. This avoids some unnecessary casting and structure manipulation in +the intermediate format while also keeping lowering to DXIL straightforward. + +LLVM intrinsics that map to operations returning `ResRet` return an anonymous +struct with element-0 being the scalar or vector type, and element-1 being the +``i1`` result of a ``CheckAccessFullyMapped`` call. We don't have a separate +call to ``CheckAccessFullyMapped`` at all, since that's the only operation that +can possibly be done on this value. In practice this may mean we insert a DXIL +operation for the check when this was missing in the HLSL source, but this +actually matches DXC's behaviour in practice. + +For TypedBuffer and Texture, we map directly from the contained type of the +resource to the return value of the intrinsic. Since these resources are +constrained to contain only scalars and vectors of up to 4 elements, the +lowering to DXIL ops is generally straightforward. The one exception we have +here is that `double` types in the elements are special - these are allowed in +the LLVM intrinsics, but are lowered to pairs of `i32` followed by +``MakeDouble`` operations for DXIL. .. _ResRet: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#resource-operation-return-types .. _CBufRet: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#cbufferloadlegacy .. _CheckAccessFullyMapped: https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/checkaccessfullymapped +.. _RawBufferLoad: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#rawbufferload .. list-table:: ``@llvm.dx.resource.load.typedbuffer`` :header-rows: 1 @@ -392,6 +396,101 @@ Examples: @llvm.dx.resource.load.typedbuffer.v2f64.tdx.TypedBuffer_v2f64_0_0t( target("dx.TypedBuffer", <2 x double>, 0, 0, 0) %buffer, i32 %index) +For RawBuffer, an HLSL load operation may return an arbitrarily sized result, +but we still constrain the LLVM intrinsic to return only up to 4 elements of a +basic type. This means that larger loads are represented as a series of loads, +which matches DXIL. Unlike in the `RawBufferLoad`_ operation, we do not need +arguments for the mask/type size and alignment, since we can calculate these +from the return type of the load during lowering. + +.. _RawBufferLoad: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#rawbufferload + +.. list-table:: ``@llvm.dx.resource.load.rawbuffer`` + :header-rows: 1 + + * - Argument + - + - Type + - Description + * - Return value + - + - A structure of a scalar or vector and the check bit + - The data loaded from the buffer and the check bit + * - ``%buffer`` + - 0 + - ``target(dx.RawBuffer, ...)`` + - The buffer to load from + * - ``%index`` + - 1 + - ``i32`` + - Index into the buffer + * - ``%offset`` + - 2 + - ``i32`` + - Offset into the structure at the given index + +Examples: + +.. code-block:: llvm + + ; float + %ret = call {float, i1} + @llvm.dx.resource.load.rawbuffer.f32.tdx.RawBuffer_f32_0_0_0t( + target("dx.RawBuffer", float, 0, 0, 0) %buffer, + i32 %index, + i32 0) + %ret = call {float, i1} + @llvm.dx.resource.load.rawbuffer.f32.tdx.RawBuffer_i8_0_0_0t( + target("dx.RawBuffer", i8, 0, 0, 0) %buffer, + i32 %byte_offset, + i32 0) + + ; float4 + %ret = call {<4 x float>, i1} + @llvm.dx.resource.load.rawbuffer.v4f32.tdx.RawBuffer_v4f32_0_0_0t( + target("dx.RawBuffer", float, 0, 0, 0) %buffer, + i32 %index, + i32 0) + %ret = call {float, i1} + @llvm.dx.resource.load.rawbuffer.v4f32.tdx.RawBuffer_i8_0_0_0t( + target("dx.RawBuffer", i8, 0, 0, 0) %buffer, + i32 %byte_offset, + i32 0) + + ; struct S0 { float4 f; int4 i; }; + %ret = call {<4 x float>, i1} + @llvm.dx.resource.load.rawbuffer.v4f32.tdx.RawBuffer_sl_v4f32v4i32s_0_0t( + target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0, 0) %buffer, + i32 %index, + i32 0) + %ret = call {<4 x i32>, i1} + @llvm.dx.resource.load.rawbuffer.v4i32.tdx.RawBuffer_sl_v4f32v4i32s_0_0t( + target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0, 0) %buffer, + i32 %index, + i32 1) + + ; struct Q { float4 f; int3 i; } + ; struct R { int z; S x; } + %ret = call {i32, i1} + @llvm.dx.resource.load.rawbuffer.i32( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x i32>}}, 0, 0, 0) + %buffer, i32 %index, i32 0) + %ret = call {<4 x float>, i1} + @llvm.dx.resource.load.rawbuffer.i32( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x i32>}}, 0, 0, 0) + %buffer, i32 %index, i32 4) + %ret = call {<3 x i32>, i1} + @llvm.dx.resource.load.rawbuffer.i32( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x i32>}}, 0, 0, 0) + %buffer, i32 %index, i32 20) + + ; byteaddressbuf.Load + %ret = call {<4 x i64>, i1} + @llvm.dx.resource.load.rawbuffer.v4i64.tdx.RawBuffer_i8_0_0t( + target("dx.RawBuffer", i8, 0, 0, 0) %buffer, + i32 %byte_offset, + i32 0) + Texture and Typed Buffer Stores ------------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index dfdfda963b62738..3b1d1a88e01a8b2 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -36,6 +36,10 @@ def int_dx_resource_load_typedbuffer def int_dx_resource_store_typedbuffer : DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty], [IntrWriteMem]>; +def int_dx_resource_load_rawbuffer + : DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty], + [llvm_any_ty, llvm_i32_ty, llvm_i32_ty], + [IntrReadMem]>; def int_dx_resource_updatecounter : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty], diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 5d865a3c0bbbb7a..62b5b704e99eb2f 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -42,8 +42,10 @@ def FloatTy : DXILOpParamType; def DoubleTy : DXILOpParamType; def ResRetHalfTy : DXILOpParamType; def ResRetFloatTy : DXILOpParamType; +def ResRetDoubleTy : DXILOpParamType; def ResRetInt16Ty : DXILOpParamType; def ResRetInt32Ty : DXILOpParamType; +def ResRetInt64Ty : DXILOpParamType; def HandleTy : DXILOpParamType; def ResBindTy : DXILOpParamType; def ResPropsTy : DXILOpParamType; @@ -890,6 +892,23 @@ def SplitDouble : DXILOp<102, splitDouble> { let attributes = [Attributes]; } +def RawBufferLoad : DXILOp<139, rawBufferLoad> { + let Doc = "reads from a raw buffer and structured buffer"; + // Handle, Coord0, Coord1, Mask, Alignment + let arguments = [HandleTy, Int32Ty, Int32Ty, Int8Ty, Int32Ty]; + let result = OverloadTy; + let overloads = [ + Overloads, + Overloads + ]; + let stages = [Stages]; +} + def Dot4AddI8Packed : DXILOp<163, dot4AddPacked> { let Doc = "signed dot product of 4 x i8 vectors packed into i32, with " "accumulate to i32"; diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp index 5d5bb3eacace258..9f88ccd7a7b7dde 100644 --- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp +++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp @@ -263,10 +263,14 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx, return getResRetType(Type::getHalfTy(Ctx)); case OpParamType::ResRetFloatTy: return getResRetType(Type::getFloatTy(Ctx)); + case OpParamType::ResRetDoubleTy: + return getResRetType(Type::getDoubleTy(Ctx)); case OpParamType::ResRetInt16Ty: return getResRetType(Type::getInt16Ty(Ctx)); case OpParamType::ResRetInt32Ty: return getResRetType(Type::getInt32Ty(Ctx)); + case OpParamType::ResRetInt64Ty: + return getResRetType(Type::getInt64Ty(Ctx)); case OpParamType::HandleTy: return getHandleType(Ctx); case OpParamType::ResBindTy: diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index e0ee4d6d6b13057..f43815bf211664a 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -542,6 +542,48 @@ class OpLowerer { }); } + [[nodiscard]] bool lowerRawBufferLoad(Function &F) { + Triple TT(Triple(M.getTargetTriple())); + VersionTuple DXILVersion = TT.getDXILVersion(); + const DataLayout &DL = F.getDataLayout(); + IRBuilder<> &IRB = OpBuilder.getIRB(); + Type *Int8Ty = IRB.getInt8Ty(); + Type *Int32Ty = IRB.getInt32Ty(); + + return replaceFunction(F, [&](CallInst *CI) -> Error { + IRB.SetInsertPoint(CI); + + Type *OldTy = cast(CI->getType())->getElementType(0); + Type *ScalarTy = OldTy->getScalarType(); + Type *NewRetTy = OpBuilder.getResRetType(ScalarTy); + + Value *Handle = + createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); + Value *Index0 = CI->getArgOperand(1); + Value *Index1 = CI->getArgOperand(2); + uint64_t NumElements = + DL.getTypeSizeInBits(OldTy) / DL.getTypeSizeInBits(ScalarTy); + Value *Mask = ConstantInt::get(Int8Ty, ~(~0U << NumElements)); + Value *Align = + ConstantInt::get(Int32Ty, DL.getPrefTypeAlign(ScalarTy).value()); + + Expected OpCall = + DXILVersion >= VersionTuple(1, 2) + ? OpBuilder.tryCreateOp(OpCode::RawBufferLoad, + {Handle, Index0, Index1, Mask, Align}, + CI->getName(), NewRetTy) + : OpBuilder.tryCreateOp(OpCode::BufferLoad, + {Handle, Index0, Index1}, CI->getName(), + NewRetTy); + if (Error E = OpCall.takeError()) + return E; + if (Error E = replaceResRetUses(CI, *OpCall, /*HasCheckBit=*/true)) + return E; + + return Error::success(); + }); + } + [[nodiscard]] bool lowerUpdateCounter(Function &F) { IRBuilder<> &IRB = OpBuilder.getIRB(); Type *Int32Ty = IRB.getInt32Ty(); @@ -736,6 +778,9 @@ class OpLowerer { case Intrinsic::dx_resource_store_typedbuffer: HasErrors |= lowerTypedBufferStore(F); break; + case Intrinsic::dx_resource_load_rawbuffer: + HasErrors |= lowerRawBufferLoad(F); + break; case Intrinsic::dx_resource_updatecounter: HasErrors |= lowerUpdateCounter(F); break; diff --git a/llvm/test/CodeGen/DirectX/BufferLoad-sm61.ll b/llvm/test/CodeGen/DirectX/BufferLoad-sm61.ll new file mode 100644 index 000000000000000..501f15192d272b3 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/BufferLoad-sm61.ll @@ -0,0 +1,60 @@ +; RUN: opt -S -dxil-op-lower %s | FileCheck %s +; Before SM6.2 ByteAddressBuffer and StructuredBuffer lower to bufferLoad. + +target triple = "dxil-pc-shadermodel6.1-compute" + +; CHECK-LABEL: define void @loadf32_struct +define void @loadf32_struct(i32 %index) { + %buffer = call target("dx.RawBuffer", float, 0, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_f32_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATA:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %{{.*}}, i32 %index, i32 0) + %load = call {float, i1} + @llvm.dx.resource.load.rawbuffer.f32.tdx.RawBuffer_f32_0_0_0t( + target("dx.RawBuffer", float, 0, 0, 0) %buffer, + i32 %index, + i32 0) + + ret void +} + +; CHECK-LABEL: define void @loadv4f32_byte +define void @loadv4f32_byte(i32 %offset) { + %buffer = call target("dx.RawBuffer", i8, 0, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATA:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %{{.*}}, i32 %offset, i32 0) + %load = call {<4 x float>, i1} + @llvm.dx.resource.load.rawbuffer.f32.tdx.RawBuffer_i8_0_0_0t( + target("dx.RawBuffer", i8, 0, 0, 0) %buffer, + i32 %offset, + i32 0) + + ret void +} + +; CHECK-LABEL: define void @loadnested +define void @loadnested(i32 %index) { + %buffer = call + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 0, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATAI32:%.*]] = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle %{{.*}}, i32 %index, i32 0) + %loadi32 = call {i32, i1} @llvm.dx.resource.load.rawbuffer.i32( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 0, 0, 0) %buffer, + i32 %index, i32 0) + + ; CHECK: [[DATAF32:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle %{{.*}}, i32 %index, i32 4) + %loadf32 = call {<4 x float>, i1} @llvm.dx.resource.load.rawbuffer.v4f32( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 0, 0, 0) %buffer, + i32 %index, i32 4) + + ; CHECK: [[DATAF16:%.*]] = call %dx.types.ResRet.f16 @dx.op.bufferLoad.f16(i32 68, %dx.types.Handle %{{.*}}, i32 %index, i32 20) + %loadf16 = call {<3 x half>, i1} @llvm.dx.resource.load.rawbuffer.v3f16( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 0, 0, 0) %buffer, + i32 %index, i32 20) + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/RawBufferLoad-error64.ll b/llvm/test/CodeGen/DirectX/RawBufferLoad-error64.ll new file mode 100644 index 000000000000000..b8a6649baf689f6 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/RawBufferLoad-error64.ll @@ -0,0 +1,24 @@ +; We use llc for this test so that we don't abort after the first error. +; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s + +target triple = "dxil-pc-shadermodel6.2-compute" + +declare void @v4f64_user(<4 x double>) + +; Can't load 64 bit types directly until SM6.3 (byteaddressbuf.Load) +; CHECK: error: +; CHECK-SAME: in function loadv4f64_byte +; CHECK-SAME: Cannot create RawBufferLoad operation: Invalid overload type +define void @loadv4f64_byte(i32 %offset) "hlsl.export" { + %buffer = call target("dx.RawBuffer", i8, 0, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + %load = call {<4 x double>, i1} @llvm.dx.resource.load.rawbuffer.v4i64( + target("dx.RawBuffer", i8, 0, 0, 0) %buffer, i32 %offset, i32 0) + %data = extractvalue {<4 x double>, i1} %load, 0 + + call void @v4f64_user(<4 x double> %data) + + ret void +} diff --git a/llvm/test/CodeGen/DirectX/RawBufferLoad.ll b/llvm/test/CodeGen/DirectX/RawBufferLoad.ll new file mode 100644 index 000000000000000..586b9c44e95d240 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/RawBufferLoad.ll @@ -0,0 +1,232 @@ +; RUN: opt -S -dxil-op-lower %s | FileCheck %s + +target triple = "dxil-pc-shadermodel6.6-compute" + +declare void @f32_user(float) +declare void @v4f32_user(<4 x float>) +declare void @i32_user(i32) +declare void @v4i32_user(<4 x i32>) +declare void @v3f16_user(<3 x half>) +declare void @v4f64_user(<4 x double>) + +; CHECK-LABEL: define void @loadf32_struct +define void @loadf32_struct(i32 %index) { + %buffer = call target("dx.RawBuffer", float, 0, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_f32_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATA:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %index, i32 0, i8 1, i32 4) + %load = call {float, i1} + @llvm.dx.resource.load.rawbuffer.f32.tdx.RawBuffer_f32_0_0_0t( + target("dx.RawBuffer", float, 0, 0, 0) %buffer, + i32 %index, + i32 0) + %data = extractvalue {float, i1} %load, 0 + + ; CHECK: [[VAL:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA]], 0 + ; CHECK: call void @f32_user(float [[VAL]]) + call void @f32_user(float %data) + + ret void +} + +; CHECK-LABEL: define void @loadf32_byte +define void @loadf32_byte(i32 %offset) { + %buffer = call target("dx.RawBuffer", i8, 0, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATA:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %offset, i32 0, i8 1, i32 4) + %load = call {float, i1} + @llvm.dx.resource.load.rawbuffer.f32.tdx.RawBuffer_i8_0_0_0t( + target("dx.RawBuffer", i8, 0, 0, 0) %buffer, + i32 %offset, + i32 0) + %data = extractvalue {float, i1} %load, 0 + + ; CHECK: [[VAL:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA]], 0 + ; CHECK: call void @f32_user(float [[VAL]]) + call void @f32_user(float %data) + + ret void +} + +; CHECK-LABEL: define void @loadv4f32_struct +define void @loadv4f32_struct(i32 %index) { + %buffer = call target("dx.RawBuffer", <4 x float>, 0, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_v4f32_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATA:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %index, i32 0, i8 15, i32 4) + %load = call {<4 x float>, i1} + @llvm.dx.resource.load.rawbuffer.f32.tdx.RawBuffer_v4f32_0_0_0t( + target("dx.RawBuffer", <4 x float>, 0, 0, 0) %buffer, + i32 %index, + i32 0) + %data = extractvalue {<4 x float>, i1} %load, 0 + + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA]], 0 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA]], 1 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA]], 2 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA]], 3 + ; CHECK: insertelement <4 x float> undef + ; CHECK: insertelement <4 x float> + ; CHECK: insertelement <4 x float> + ; CHECK: insertelement <4 x float> + ; CHECK: call void @v4f32_user(<4 x float> + call void @v4f32_user(<4 x float> %data) + + ret void +} + +; CHECK-LABEL: define void @loadv4f32_byte +define void @loadv4f32_byte(i32 %offset) { + %buffer = call target("dx.RawBuffer", i8, 0, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATA:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %offset, i32 0, i8 15, i32 4) + %load = call {<4 x float>, i1} + @llvm.dx.resource.load.rawbuffer.f32.tdx.RawBuffer_i8_0_0_0t( + target("dx.RawBuffer", i8, 0, 0, 0) %buffer, + i32 %offset, + i32 0) + %data = extractvalue {<4 x float>, i1} %load, 0 + + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA]], 0 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA]], 1 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA]], 2 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATA]], 3 + ; CHECK: insertelement <4 x float> undef + ; CHECK: insertelement <4 x float> + ; CHECK: insertelement <4 x float> + ; CHECK: insertelement <4 x float> + ; CHECK: call void @v4f32_user(<4 x float> + call void @v4f32_user(<4 x float> %data) + + ret void +} + +; CHECK-LABEL: define void @loadelements +define void @loadelements(i32 %index) { + %buffer = call target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_sl_v4f32v4i32s_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATAF32:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %index, i32 0, i8 15, i32 4) + %loadf32 = call {<4 x float>, i1} + @llvm.dx.resource.load.rawbuffer.v4f32( + target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0, 0) %buffer, + i32 %index, + i32 0) + %dataf32 = extractvalue {<4 x float>, i1} %loadf32, 0 + + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATAF32]], 0 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATAF32]], 1 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATAF32]], 2 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATAF32]], 3 + ; CHECK: insertelement <4 x float> undef + ; CHECK: insertelement <4 x float> + ; CHECK: insertelement <4 x float> + ; CHECK: insertelement <4 x float> + ; CHECK: call void @v4f32_user(<4 x float> + call void @v4f32_user(<4 x float> %dataf32) + + ; CHECK: [[DATAI32:%.*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %index, i32 1, i8 15, i32 4) + %loadi32 = call {<4 x i32>, i1} + @llvm.dx.resource.load.rawbuffer.v4i32( + target("dx.RawBuffer", {<4 x float>, <4 x i32>}, 0, 0, 0) %buffer, + i32 %index, + i32 1) + %datai32 = extractvalue {<4 x i32>, i1} %loadi32, 0 + + ; CHECK: extractvalue %dx.types.ResRet.i32 [[DATAI32]], 0 + ; CHECK: extractvalue %dx.types.ResRet.i32 [[DATAI32]], 1 + ; CHECK: extractvalue %dx.types.ResRet.i32 [[DATAI32]], 2 + ; CHECK: extractvalue %dx.types.ResRet.i32 [[DATAI32]], 3 + ; CHECK: insertelement <4 x i32> undef + ; CHECK: insertelement <4 x i32> + ; CHECK: insertelement <4 x i32> + ; CHECK: insertelement <4 x i32> + ; CHECK: call void @v4i32_user(<4 x i32> + call void @v4i32_user(<4 x i32> %datai32) + + ret void +} + +; CHECK-LABEL: define void @loadnested +define void @loadnested(i32 %index) { + %buffer = call + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 0, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATAI32:%.*]] = call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32(i32 139, %dx.types.Handle %{{.*}}, i32 %index, i32 0, i8 1, i32 4) + %loadi32 = call {i32, i1} @llvm.dx.resource.load.rawbuffer.i32( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 0, 0, 0) %buffer, + i32 %index, i32 0) + %datai32 = extractvalue {i32, i1} %loadi32, 0 + + ; CHECK: [[VALI32:%.*]] = extractvalue %dx.types.ResRet.i32 [[DATAI32]], 0 + ; CHECK: call void @i32_user(i32 [[VALI32]]) + call void @i32_user(i32 %datai32) + + ; CHECK: [[DATAF32:%.*]] = call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 %index, i32 4, i8 15, i32 4) + %loadf32 = call {<4 x float>, i1} @llvm.dx.resource.load.rawbuffer.v4f32( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 0, 0, 0) %buffer, + i32 %index, i32 4) + %dataf32 = extractvalue {<4 x float>, i1} %loadf32, 0 + + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATAF32]], 0 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATAF32]], 1 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATAF32]], 2 + ; CHECK: extractvalue %dx.types.ResRet.f32 [[DATAF32]], 3 + ; CHECK: insertelement <4 x float> undef + ; CHECK: insertelement <4 x float> + ; CHECK: insertelement <4 x float> + ; CHECK: insertelement <4 x float> + ; CHECK: call void @v4f32_user(<4 x float> + call void @v4f32_user(<4 x float> %dataf32) + + ; CHECK: [[DATAF16:%.*]] = call %dx.types.ResRet.f16 @dx.op.rawBufferLoad.f16(i32 139, %dx.types.Handle %{{.*}}, i32 %index, i32 20, i8 7, i32 2) + %loadf16 = call {<3 x half>, i1} @llvm.dx.resource.load.rawbuffer.v3f16( + target("dx.RawBuffer", {i32, {<4 x float>, <3 x half>}}, 0, 0, 0) %buffer, + i32 %index, i32 20) + %dataf16 = extractvalue {<3 x half>, i1} %loadf16, 0 + + ; CHECK: extractvalue %dx.types.ResRet.f16 [[DATAF16]], 0 + ; CHECK: extractvalue %dx.types.ResRet.f16 [[DATAF16]], 1 + ; CHECK: extractvalue %dx.types.ResRet.f16 [[DATAF16]], 2 + ; CHECK: insertelement <3 x half> undef + ; CHECK: insertelement <3 x half> + ; CHECK: insertelement <3 x half> + ; CHECK: call void @v3f16_user(<3 x half> + call void @v3f16_user(<3 x half> %dataf16) + + ret void +} + +; byteaddressbuf.Load +; CHECK-LABEL: define void @loadv4f64_byte +define void @loadv4f64_byte(i32 %offset) { + %buffer = call target("dx.RawBuffer", i8, 0, 0, 0) + @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i8_0_0_0( + i32 0, i32 0, i32 1, i32 0, i1 false) + + ; CHECK: [[DATA:%.*]] = call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle %{{.*}}, i32 %offset, i32 0, i8 15, i32 8) + %load = call {<4 x double>, i1} @llvm.dx.resource.load.rawbuffer.v4i64( + target("dx.RawBuffer", i8, 0, 0, 0) %buffer, i32 %offset, i32 0) + %data = extractvalue {<4 x double>, i1} %load, 0 + + ; CHECK: extractvalue %dx.types.ResRet.f64 [[DATA]], 0 + ; CHECK: extractvalue %dx.types.ResRet.f64 [[DATA]], 1 + ; CHECK: extractvalue %dx.types.ResRet.f64 [[DATA]], 2 + ; CHECK: extractvalue %dx.types.ResRet.f64 [[DATA]], 3 + ; CHECK: insertelement <4 x double> undef + ; CHECK: insertelement <4 x double> + ; CHECK: insertelement <4 x double> + ; CHECK: insertelement <4 x double> + ; CHECK: call void @v4f64_user(<4 x double> + call void @v4f64_user(<4 x double> %data) + + ret void +} diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index a0c93bed5ad8346..7488c8de5788517 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -218,8 +218,10 @@ static StringRef getOverloadKindStr(const Record *R) { .Case("Int64Ty", "OverloadKind::I64") .Case("ResRetHalfTy", "OverloadKind::HALF") .Case("ResRetFloatTy", "OverloadKind::FLOAT") + .Case("ResRetDoubleTy", "OverloadKind::DOUBLE") .Case("ResRetInt16Ty", "OverloadKind::I16") - .Case("ResRetInt32Ty", "OverloadKind::I32"); + .Case("ResRetInt32Ty", "OverloadKind::I32") + .Case("ResRetInt64Ty", "OverloadKind::I64"); } /// Return a string representation of valid overload information denoted