Skip to content

Commit

Permalink
Adding e2e tests for i1 mask attentions (iree-org#19312)
Browse files Browse the repository at this point in the history
* New tests are aimed at testing with option
`--iree-experimental-packed-i1-storage` turned on, which allows real
packed i1 datatype in memory.
* Only certain shapes are correct at this moment as upstream patches for
emulating unaligned vector stores are not yet merged.

Signed-off-by: Alan Li <[email protected]>
  • Loading branch information
lialan authored Dec 5, 2024
1 parent df34911 commit 5dee2c8
Show file tree
Hide file tree
Showing 5 changed files with 277 additions and 0 deletions.
26 changes: 26 additions & 0 deletions tests/e2e/linalg_ext_ops/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ ALL_SRCS = enforce_glob(
"winograd_output.mlir",
],
include = ["*.mlir"],
exclude = [
"attention_i1_mask.mlir",
],
)

iree_check_single_backend_test_suite(
Expand All @@ -39,6 +42,24 @@ iree_check_single_backend_test_suite(
target_backend = "llvm-cpu",
)

iree_check_single_backend_test_suite(
name = "check_llvm-cpu_local-task_i1",
srcs = [
"attention_i1_mask.mlir",
],
compiler_flags = [
"--iree-llvmcpu-target-cpu=generic",
"--iree-experimental-packed-i1-storage",
],
driver = "local-task",
tags = [
# attention fails with a wasm target, just disable the tests there for now
# error: Yield operand #2 is not equivalent to the corresponding iter bbArg
"nowasm",
],
target_backend = "llvm-cpu",
)

VMVX_SRCS = enforce_glob(
# keep sorted
[
Expand All @@ -52,6 +73,7 @@ VMVX_SRCS = enforce_glob(
include = ["*.mlir"],
exclude = [
"attention.mlir",
"attention_i1_mask.mlir",
],
)

Expand All @@ -75,6 +97,7 @@ LLVM_GPU_SRCS = enforce_glob(
include = ["*.mlir"],
exclude = [
"attention.mlir",
"attention_i1_mask.mlir",
],
)

Expand Down Expand Up @@ -107,6 +130,7 @@ ROCM_HIP_SRCS = enforce_glob(
exclude = [
"top-k.mlir",
"attention.mlir",
"attention_i1_mask.mlir",
],
)

Expand All @@ -131,6 +155,7 @@ iree_check_single_backend_test_suite(
include = ["*.mlir"],
exclude = [
"attention.mlir",
"attention_i1_mask.mlir",
"top-k.mlir",
],
),
Expand All @@ -152,6 +177,7 @@ iree_check_single_backend_test_suite(
include = ["*.mlir"],
exclude = [
"attention.mlir",
"attention_i1_mask.mlir",
"top-k.mlir",
],
),
Expand Down
16 changes: 16 additions & 0 deletions tests/e2e/linalg_ext_ops/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,22 @@ iree_check_single_backend_test_suite(
"nowasm"
)

iree_check_single_backend_test_suite(
NAME
check_llvm-cpu_local-task_i1
SRCS
"attention_i1_mask.mlir"
TARGET_BACKEND
"llvm-cpu"
DRIVER
"local-task"
COMPILER_FLAGS
"--iree-llvmcpu-target-cpu=generic"
"--iree-experimental-packed-i1-storage"
LABELS
"nowasm"
)

iree_check_single_backend_test_suite(
NAME
check_vmvx_local-task
Expand Down
42 changes: 42 additions & 0 deletions tests/e2e/linalg_ext_ops/attention.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,48 @@ func.func @causal_attention1x3x4() {
return
}

func.func @attention1x4x4_i1_mask_all_ones() {
%init = tensor.empty() : tensor<1x4x4xf32>
%query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
%value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%mask = util.unfoldable_constant dense<[[[true, true, true, true],
[true, true, true, true],
[true, true, true, true],
[true, true, true, true]]]> : tensor<1x4x4xi1>

%scale = arith.constant 0.5 : f32
%1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
affine_map<(d0, d1, d2, d3, d4) -> ()>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
^bb0(%arg0: f32):
iree_linalg_ext.yield %arg0 : f32
} -> tensor<1x4x4xf32>
check.expect_almost_eq_const(
%1,
dense<[[[0.798884, 0.898884, 0.998884, 1.09888],
[0.941939, 1.04194, 1.14194, 1.24194],
[1.05371, 1.15371, 1.25371, 1.35371],
[1.13295, 1.23295, 1.33295, 1.43295]]]> : tensor<1x4x4xf32>
) : tensor<1x4x4xf32>
return
}

func.func @softcap_attention1x3x4() {
%init = tensor.empty() : tensor<1x3x4xf32>
Expand Down
122 changes: 122 additions & 0 deletions tests/e2e/linalg_ext_ops/attention_i1_mask.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
func.func @attention1x4x4_i1_mask() {
%init = tensor.empty() : tensor<1x4x4xf32>
%query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
%value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%i8mask = util.unfoldable_constant dense<[165, 165]> : tensor<2xi8>
%mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1>

%scale = arith.constant 0.5 : f32
%1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
affine_map<(d0, d1, d2, d3, d4) -> ()>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
^bb0(%arg0: f32):
iree_linalg_ext.yield %arg0 : f32
} -> tensor<1x4x4xf32>
check.expect_almost_eq_const(
%1,
dense<[[[0.57895, 0.67895, 0.77895, 0.87895],
[1.09108, 1.19108, 1.29108, 1.39108],
[0.774324, 0.874324, 0.974324, 1.07432],
[1.22842, 1.32842, 1.42842, 1.52842]]]> : tensor<1x4x4xf32>
) : tensor<1x4x4xf32>
return
}

func.func @attention1x4x4_i1_mask_all_ones() {
%init = tensor.empty() : tensor<1x4x4xf32>
%query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
%value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%i8mask = util.unfoldable_constant dense<[255, 255]> : tensor<2xi8>
%mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1>

%scale = arith.constant 0.5 : f32
%1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
affine_map<(d0, d1, d2, d3, d4) -> ()>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
^bb0(%arg0: f32):
iree_linalg_ext.yield %arg0 : f32
} -> tensor<1x4x4xf32>
check.expect_almost_eq_const(
%1,
dense<[[[0.798884, 0.898884, 0.998884, 1.09888],
[0.941939, 1.04194, 1.14194, 1.24194],
[1.05371, 1.15371, 1.25371, 1.35371],
[1.13295, 1.23295, 1.33295, 1.43295]]]> : tensor<1x4x4xf32>
) : tensor<1x4x4xf32>
return
}

func.func @attention1x4x4_i1_mask_tril() {
%init = tensor.empty() : tensor<1x4x4xf32>
%query = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%key = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>
%value = util.unfoldable_constant dense<[[[0.1, 0.2, 0.3, 0.4],
[0.5, 0.6, 0.7, 0.8],
[0.9, 1.0, 1.1, 1.2],
[1.3, 1.4, 1.5, 1.6]]]> : tensor<1x4x4xf32>

%i8mask = util.unfoldable_constant dense<[140, 239]> : tensor<2xi8>
%mask = flow.tensor.bitcast %i8mask : tensor<2xi8> -> tensor<1x4x4xi1>

%scale = arith.constant 0.5 : f32
%1 = iree_linalg_ext.attention {indexing_maps = [affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d4)>,
affine_map<(d0, d1, d2, d3, d4) -> ()>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>,
affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d4)>]}
ins(%query, %key, %value, %scale, %mask : tensor<1x4x4xf32>,
tensor<1x4x4xf32>, tensor<1x4x4xf32>, f32, tensor<1x4x4xi1>) outs(%init : tensor<1x4x4xf32>) {
^bb0(%arg0: f32):
iree_linalg_ext.yield %arg0 : f32
} -> tensor<1x4x4xf32>
check.expect_almost_eq_const(
%1,
dense<[[[1.11993, 1.21993, 1.31993, 1.41993],
[1.3, 1.4, 1.5, 1.6],
[1.05371, 1.15371, 1.25371, 1.35371],
[1.15549, 1.25549, 1.35549, 1.45549]]]> : tensor<1x4x4xf32>
) : tensor<1x4x4xf32>
return
}
71 changes: 71 additions & 0 deletions tests/e2e/subbyte_types/subbyte_types.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,74 @@ func.func @i1_type_slice() {
check.expect_eq_const(%tensor_res, dense<[255]> : tensor<1xi8>) : tensor<1xi8>
return
}

func.func @i1_representation() {
%mask = util.unfoldable_constant dense<[140]> : tensor<1xi8>
%casted = flow.tensor.bitcast %mask : tensor<1xi8> -> tensor<2x4xi1>
%bar = util.optimization_barrier %casted : tensor<2x4xi1>
%tensor_res = flow.tensor.bitcast %bar : tensor<2x4xi1> -> tensor<1xi8>
check.expect_eq_const(%tensor_res, dense<[140]> : tensor<1xi8>) : tensor<1xi8>
return
}

func.func @i1_representation_2() {
%mask = util.unfoldable_constant dense<[140, 77]> : tensor<2xi8>
%casted = flow.tensor.bitcast %mask : tensor<2xi8> -> tensor<2x8xi1>
%bar = util.optimization_barrier %casted : tensor<2x8xi1>
%tensor_res = flow.tensor.bitcast %bar : tensor<2x8xi1> -> tensor<2xi8>
check.expect_eq_const(%tensor_res, dense<[140, 77]> : tensor<2xi8>) : tensor<2xi8>
return
}

func.func @i1_representation_3() {
%mask = util.unfoldable_constant dense<[140, 77]> : tensor<2xi8>
%casted = flow.tensor.bitcast %mask : tensor<2xi8> -> tensor<4x4xi1>
%bar = util.optimization_barrier %casted : tensor<4x4xi1>
%tensor_res = flow.tensor.bitcast %bar : tensor<4x4xi1> -> tensor<2xi8>
check.expect_eq_const(%tensor_res, dense<[140, 77]> : tensor<2xi8>) : tensor<2xi8>
return
}

func.func @truncate_i1() {
%mask = util.unfoldable_constant dense<[1, 1, 0, 0,
0, 0, 1, 1]> : tensor<8xi8>
%nm = tensor.empty() : tensor<8xi1>
%truncm = linalg.generic
{indexing_maps = [
affine_map<(d0) -> (d0)>,
affine_map<(d0) -> (d0)>],
iterator_types = ["parallel"]}
ins(%mask: tensor<8xi8>)
outs(%nm: tensor<8xi1>) {
^bb0(%in: i8, %out: i1):
%zero = arith.constant 0 : i8
%truncated = arith.cmpi "sgt", %in, %zero : i8
linalg.yield %truncated : i1
} -> tensor<8xi1>
%tensor_res = flow.tensor.bitcast %truncm : tensor<8xi1> -> tensor<1xi8>
check.expect_eq_const(%tensor_res, dense<[195]> : tensor<1xi8>) : tensor<1xi8>
return
}

func.func @truncate_i1_2() {
%mask = util.unfoldable_constant dense<[[0, 0, 1, 1],
[1, 1, 0, 0],
[1, 1, 0, 0],
[0, 0, 1, 1]]> : tensor<4x4xi8>
%nm = tensor.empty() : tensor<4x4xi1>
%truncm = linalg.generic
{indexing_maps = [
affine_map<(d0, d1) -> (d0, d1)>,
affine_map<(d0, d1) -> (d0, d1)>],
iterator_types = ["parallel", "parallel"]}
ins(%mask: tensor<4x4xi8>)
outs(%nm: tensor<4x4xi1>) {
^bb0(%in: i8, %out: i1):
%zero = arith.constant 0 : i8
%truncated = arith.cmpi "sgt", %in, %zero : i8
linalg.yield %truncated : i1
} -> tensor<4x4xi1>
%tensor_res = flow.tensor.bitcast %truncm : tensor<4x4xi1> -> tensor<2xi8>
check.expect_eq_const(%tensor_res, dense<[60, 195]> : tensor<2xi8>) : tensor<2xi8>
return
}

0 comments on commit 5dee2c8

Please sign in to comment.