Skip to content

Commit 1cc8176

Browse files
authored
[CIR][Test] Add test for vec3 array memory access optimization (#685) (#1946)
Documents how ClangIR handles vec3 array element loads using the vec3->vec4 optimization for aligned memory access. The test shows the pointer cast to vec4, load as vec4, and shuffle back to vec3 pattern in both CIR and LLVM IR. Closes #685
1 parent 8f3c809 commit 1cc8176

File tree

1 file changed

+43
-0
lines changed

1 file changed

+43
-0
lines changed

clang/test/CIR/CodeGen/vectype-ext.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,3 +563,46 @@ void vector_shuffle_dynamic_mask_test() {
563563

564564
// LLVM: {{.*}} = and <6 x i32> {{.*}}, splat (i32 7)
565565
}
566+
567+
// Test for arrays of 3-component extended vectors
568+
// This documents how vec3 arrays are handled in memory accesses
569+
// Original CodeGen treats vec3 as vec4 for aligned memory access
570+
// See issue #685
571+
// CIR: cir.func dso_local {{@.*test_vec3_array.*}}
572+
// LLVM: define dso_local void {{@.*test_vec3_array.*}}
573+
void test_vec3_array() {
574+
vi3 arr[4] = {};
575+
// CIR: cir.alloca !cir.array<!cir.vector<!s32i x 3> x 4>, !cir.ptr<!cir.array<!cir.vector<!s32i x 3> x 4>>, ["arr"]
576+
// LLVM: alloca [4 x <3 x i32>], i64 1, align 16
577+
578+
vi3 *ptr = &arr[0];
579+
// CIR: cir.get_element{{.*}}!cir.array<!cir.vector<!s32i x 3> x 4>
580+
// LLVM: getelementptr [4 x <3 x i32>]
581+
582+
// Key behavior: Loading from array element shows vec3->vec4 optimization
583+
arr[0] + arr[1];
584+
// CIR: %[[#PTR0:]] = cir.get_element{{.*}}!cir.ptr<!cir.vector<!s32i x 3>>
585+
// CIR-NEXT: %[[#PTR0_V4:]] = cir.cast bitcast %[[#PTR0]] : !cir.ptr<!cir.vector<!s32i x 3>> -> !cir.ptr<!cir.vector<!s32i x 4>>
586+
// CIR-NEXT: %[[#V4_0:]] = cir.load{{.*}}%[[#PTR0_V4]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
587+
// CIR-NEXT: %[[#POISON0:]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
588+
// CIR-NEXT: %[[#V3_0:]] = cir.vec.shuffle(%[[#V4_0]], %[[#POISON0]] : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s32i x 3>
589+
590+
// LLVM: %[[#GEP0:]] = getelementptr [4 x <3 x i32>], ptr %{{.+}}, i32 0, i64 0
591+
// LLVM-NEXT: %[[#LOAD_V4_0:]] = load <4 x i32>, ptr %[[#GEP0]], align 16
592+
// LLVM-NEXT: %[[#LOAD_V3_0:]] = shufflevector <4 x i32> %[[#LOAD_V4_0]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
593+
594+
// Same pattern for arr[1]
595+
// CIR: %[[#PTR1:]] = cir.get_element{{.*}}!cir.ptr<!cir.vector<!s32i x 3>>
596+
// CIR-NEXT: %[[#PTR1_V4:]] = cir.cast bitcast %[[#PTR1]] : !cir.ptr<!cir.vector<!s32i x 3>> -> !cir.ptr<!cir.vector<!s32i x 4>>
597+
// CIR-NEXT: %[[#V4_1:]] = cir.load{{.*}}%[[#PTR1_V4]] : !cir.ptr<!cir.vector<!s32i x 4>>, !cir.vector<!s32i x 4>
598+
// CIR-NEXT: %[[#POISON1:]] = cir.const #cir.poison : !cir.vector<!s32i x 4>
599+
// CIR-NEXT: %[[#V3_1:]] = cir.vec.shuffle(%[[#V4_1]], %[[#POISON1]] : !cir.vector<!s32i x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<!s32i x 3>
600+
// CIR: cir.binop(add, %[[#V3_0]], %[[#V3_1]]) : !cir.vector<!s32i x 3>
601+
602+
// LLVM: %[[#GEP1:]] = getelementptr [4 x <3 x i32>], ptr %{{.+}}, i32 0, i64 1
603+
// LLVM-NEXT: %[[#LOAD_V4_1:]] = load <4 x i32>, ptr %[[#GEP1]], align 16
604+
// LLVM-NEXT: %[[#LOAD_V3_1:]] = shufflevector <4 x i32> %[[#LOAD_V4_1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2>
605+
// LLVM: add <3 x i32> %[[#LOAD_V3_0]], %[[#LOAD_V3_1]]
606+
607+
// Note: Array element stores (arr[i] = value) are not yet implemented (NYI at CIRGenExpr.cpp:640)
608+
}

0 commit comments

Comments
 (0)