diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java index 36015a03541..c0afe1d66a5 100644 --- a/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java +++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestDependencyOffsets.java @@ -28,7 +28,7 @@ * Note: CompileCommand Option Vectorize is enabled. * * Note: this test is auto-generated. Please modify / generate with script: - * https://bugs.openjdk.org/browse/JDK-8310308 + * https://bugs.openjdk.org/browse/JDK-8312570 * * Types: int, long, short, char, byte, float, double * Offsets: 0, -1, 1, -2, 2, -3, 3, -4, 4, -7, 7, -8, 8, -14, 14, -16, 16, -18, 18, -20, 20, -31, 31, -32, 32, -63, 63, -64, 64, -65, 65, -128, 128, -129, 129, -192, 192 @@ -91,7 +91,7 @@ /* * @test id=vanilla-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @library /test/lib / @@ -100,7 +100,7 @@ /* * @test id=vanilla-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @library /test/lib / @@ -109,7 +109,7 @@ /* * @test id=sse4-v016-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -120,7 +120,7 @@ /* * @test id=sse4-v016-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -131,7 +131,7 @@ /* * @test id=sse4-v008-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -142,7 +142,7 @@ /* * @test id=sse4-v008-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -153,7 +153,7 @@ /* * @test id=sse4-v004-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -164,7 +164,7 @@ /* * @test id=sse4-v004-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -175,7 +175,7 @@ /* * @test id=sse4-v002-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -186,7 +186,7 @@ /* * @test id=sse4-v002-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -197,7 +197,7 @@ /* * @test id=avx1-v032-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -208,7 +208,7 @@ /* * @test id=avx1-v032-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -219,7 +219,7 @@ /* * @test id=avx1-v016-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -230,7 +230,7 @@ /* * @test id=avx1-v016-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -241,7 +241,7 @@ /* * @test id=avx2-v032-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -252,7 +252,7 @@ /* * @test id=avx2-v032-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -263,7 +263,7 @@ /* * @test id=avx2-v016-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -274,7 +274,7 @@ /* * @test id=avx2-v016-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -285,7 +285,7 @@ /* * @test id=avx512-v064-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -296,7 +296,7 @@ /* * @test id=avx512-v064-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -307,7 +307,7 @@ /* * @test id=avx512-v032-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -318,7 +318,7 @@ /* * @test id=avx512-v032-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -329,7 +329,7 @@ /* * @test id=avx512bw-v064-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -340,7 +340,7 @@ /* * @test id=avx512bw-v064-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -351,7 +351,7 @@ /* * @test id=avx512bw-v032-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -362,7 +362,7 @@ /* * @test id=avx512bw-v032-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch=="x86" | os.arch=="i386" | os.arch=="amd64" | os.arch=="x86_64") @@ -373,7 +373,7 @@ /* * @test id=vec-v064-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -383,7 +383,7 @@ /* * @test id=vec-v064-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -393,7 +393,7 @@ /* * @test id=vec-v032-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -403,7 +403,7 @@ /* * @test id=vec-v032-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -413,7 +413,7 @@ /* * @test id=vec-v016-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -423,7 +423,7 @@ /* * @test id=vec-v016-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -433,7 +433,7 @@ /* * @test id=vec-v008-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -443,7 +443,7 @@ /* * @test id=vec-v008-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -453,7 +453,7 @@ /* * @test id=vec-v004-A - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -463,7 +463,7 @@ /* * @test id=vec-v004-U - * @bug 8298935 + * @bug 8298935 8310308 8312570 * @summary Test SuperWord: vector size, offsets, dependencies, alignment. * @requires vm.compiler2.enabled * @requires (os.arch!="x86" & os.arch!="i386" & os.arch!="amd64" & os.arch!="x86_64") @@ -1402,10 +1402,14 @@ public static void main(String args[]) { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP0(int[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (int)(data[j] * (int)-11); @@ -1446,14 +1450,22 @@ public static void runIntP0() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM1(int[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (int)(data[j] * (int)-11); @@ -1479,7 +1491,10 @@ public static void runIntM1() { // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 // positive byte_offset 4 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 4 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 4 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testIntP1(int[] data) { @@ -1510,10 +1525,14 @@ public static void runIntP1() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM2(int[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (int)(data[j] * (int)-11); @@ -1545,11 +1564,16 @@ public static void runIntM2() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 8 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP2(int[] data) { for (int j = 0; j < RANGE - 2; j++) { data[j + 2] = (int)(data[j] * (int)-11); @@ -1590,14 +1614,22 @@ public static void runIntP2() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM3(int[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (int)(data[j] * (int)-11); @@ -1629,11 +1661,16 @@ public static void runIntM3() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 12 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 12 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP3(int[] data) { for (int j = 0; j < RANGE - 3; j++) { data[j + 3] = (int)(data[j] * (int)-11); @@ -1662,10 +1699,14 @@ public static void runIntP3() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM4(int[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (int)(data[j] * (int)-11); @@ -1700,11 +1741,19 @@ public static void runIntM4() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP4(int[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (int)(data[j] * (int)-11); @@ -1745,14 +1794,22 @@ public static void runIntP4() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntM7(int[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (int)(data[j] * (int)-11); @@ -1783,11 +1840,15 @@ public static void runIntM7() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 28 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP7(int[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (int)(data[j] * (int)-11); @@ -1816,10 +1877,14 @@ public static void runIntP7() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntM8(int[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (int)(data[j] * (int)-11); @@ -1857,14 +1922,19 @@ public static void runIntM8() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP8(int[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (int)(data[j] * (int)-11); @@ -1893,10 +1963,14 @@ public static void runIntP8() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM14(int[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (int)(data[j] * (int)-11); @@ -1926,10 +2000,15 @@ public static void runIntM14() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 56"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 56 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 56"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP14(int[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (int)(data[j] * (int)-11); @@ -1958,10 +2037,14 @@ public static void runIntP14() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM16(int[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (int)(data[j] * (int)-11); @@ -2002,14 +2085,19 @@ public static void runIntM16() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP16(int[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (int)(data[j] * (int)-11); @@ -2038,10 +2126,14 @@ public static void runIntP16() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM18(int[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (int)(data[j] * (int)-11); @@ -2070,10 +2162,15 @@ public static void runIntM18() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 72 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 72"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP18(int[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (int)(data[j] * (int)-11); @@ -2102,10 +2199,14 @@ public static void runIntP18() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntM20(int[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (int)(data[j] * (int)-11); @@ -2138,10 +2239,19 @@ public static void runIntM20() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 80 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 80"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP20(int[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (int)(data[j] * (int)-11); @@ -2182,14 +2292,22 @@ public static void runIntP20() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM31(int[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (int)(data[j] * (int)-11); @@ -2218,10 +2336,15 @@ public static void runIntM31() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 124 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 124"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP31(int[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (int)(data[j] * (int)-11); @@ -2250,10 +2373,14 @@ public static void runIntP31() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM32(int[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (int)(data[j] * (int)-11); @@ -2294,14 +2421,19 @@ public static void runIntM32() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP32(int[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (int)(data[j] * (int)-11); @@ -2342,14 +2474,22 @@ public static void runIntP32() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM63(int[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (int)(data[j] * (int)-11); @@ -2378,10 +2518,15 @@ public static void runIntM63() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 252 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 252"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP63(int[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (int)(data[j] * (int)-11); @@ -2410,10 +2555,14 @@ public static void runIntP63() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM64(int[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (int)(data[j] * (int)-11); @@ -2454,14 +2603,22 @@ public static void runIntM64() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP64(int[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (int)(data[j] * (int)-11); @@ -2502,14 +2659,22 @@ public static void runIntP64() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM65(int[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (int)(data[j] * (int)-11); @@ -2538,10 +2703,14 @@ public static void runIntM65() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP65(int[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (int)(data[j] * (int)-11); @@ -2570,10 +2739,14 @@ public static void runIntP65() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntM128(int[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (int)(data[j] * (int)-11); @@ -2614,14 +2787,22 @@ public static void runIntM128() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntP128(int[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (int)(data[j] * (int)-11); @@ -2662,14 +2843,22 @@ public static void runIntP128() { @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_I, IRNode.MUL_VI, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntM129(int[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (int)(data[j] * (int)-11); @@ -2698,10 +2887,14 @@ public static void runIntM129() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP129(int[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (int)(data[j] * (int)-11); @@ -2730,10 +2923,14 @@ public static void runIntP129() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testIntM192(int[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (int)(data[j] * (int)-11); @@ -2774,14 +2971,22 @@ public static void runIntM192() { @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_I, "> 0", IRNode.MUL_VI, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testIntP192(int[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (int)(data[j] * (int)-11); @@ -2810,10 +3015,14 @@ public static void runIntP192() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongP0(long[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (long)(data[j] + (long)-11); @@ -2854,14 +3063,22 @@ public static void runLongP0() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongM1(long[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (long)(data[j] + (long)-11); @@ -2887,7 +3104,10 @@ public static void runLongM1() { // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 // positive byte_offset 8 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + // positive byte_offset 8 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 // positive byte_offset 8 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testLongP1(long[] data) { @@ -2918,10 +3138,14 @@ public static void runLongP1() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM2(long[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (long)(data[j] + (long)-11); @@ -2956,11 +3180,19 @@ public static void runLongM2() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongP2(long[] data) { for (int j = 0; j < RANGE - 2; j++) { data[j + 2] = (long)(data[j] + (long)-11); @@ -3001,14 +3233,22 @@ public static void runLongP2() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongM3(long[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (long)(data[j] + (long)-11); @@ -3039,11 +3279,15 @@ public static void runLongM3() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 // positive byte_offset 24 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongP3(long[] data) { for (int j = 0; j < RANGE - 3; j++) { data[j + 3] = (long)(data[j] + (long)-11); @@ -3072,10 +3316,14 @@ public static void runLongP3() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM4(long[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (long)(data[j] + (long)-11); @@ -3113,14 +3361,19 @@ public static void runLongM4() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP4(long[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (long)(data[j] + (long)-11); @@ -3161,14 +3414,22 @@ public static void runLongP4() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM7(long[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (long)(data[j] + (long)-11); @@ -3198,10 +3459,15 @@ public static void runLongM7() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 56"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 56 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 56"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP7(long[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (long)(data[j] + (long)-11); @@ -3230,10 +3496,14 @@ public static void runLongP7() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM8(long[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (long)(data[j] + (long)-11); @@ -3274,14 +3544,19 @@ public static void runLongM8() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP8(long[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (long)(data[j] + (long)-11); @@ -3310,10 +3585,14 @@ public static void runLongP8() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM14(long[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (long)(data[j] + (long)-11); @@ -3346,10 +3625,19 @@ public static void runLongM14() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 112 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 112"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP14(long[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (long)(data[j] + (long)-11); @@ -3378,10 +3666,14 @@ public static void runLongP14() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM16(long[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (long)(data[j] + (long)-11); @@ -3422,14 +3714,19 @@ public static void runLongM16() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP16(long[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (long)(data[j] + (long)-11); @@ -3458,10 +3755,14 @@ public static void runLongP16() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM18(long[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (long)(data[j] + (long)-11); @@ -3494,10 +3795,19 @@ public static void runLongM18() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 144 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 144"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP18(long[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (long)(data[j] + (long)-11); @@ -3526,10 +3836,14 @@ public static void runLongP18() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongM20(long[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (long)(data[j] + (long)-11); @@ -3566,14 +3880,19 @@ public static void runLongM20() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 160 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 160"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP20(long[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (long)(data[j] + (long)-11); @@ -3614,14 +3933,22 @@ public static void runLongP20() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM31(long[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (long)(data[j] + (long)-11); @@ -3650,10 +3977,15 @@ public static void runLongM31() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 248 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 248"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP31(long[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (long)(data[j] + (long)-11); @@ -3682,10 +4014,14 @@ public static void runLongP31() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM32(long[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (long)(data[j] + (long)-11); @@ -3726,14 +4062,22 @@ public static void runLongM32() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP32(long[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (long)(data[j] + (long)-11); @@ -3774,14 +4118,22 @@ public static void runLongP32() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongM63(long[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (long)(data[j] + (long)-11); @@ -3810,10 +4162,14 @@ public static void runLongM63() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongP63(long[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (long)(data[j] + (long)-11); @@ -3842,10 +4198,14 @@ public static void runLongP63() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongM64(long[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (long)(data[j] + (long)-11); @@ -3886,14 +4246,22 @@ public static void runLongM64() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP64(long[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (long)(data[j] + (long)-11); @@ -3934,14 +4302,22 @@ public static void runLongP64() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM65(long[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (long)(data[j] + (long)-11); @@ -3970,10 +4346,14 @@ public static void runLongM65() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP65(long[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (long)(data[j] + (long)-11); @@ -4002,10 +4382,14 @@ public static void runLongP65() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM128(long[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (long)(data[j] + (long)-11); @@ -4046,14 +4430,22 @@ public static void runLongM128() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongP128(long[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (long)(data[j] + (long)-11); @@ -4094,14 +4486,22 @@ public static void runLongP128() { @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_L, IRNode.ADD_VL, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongM129(long[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (long)(data[j] + (long)-11); @@ -4130,10 +4530,14 @@ public static void runLongM129() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongP129(long[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (long)(data[j] + (long)-11); @@ -4162,10 +4566,14 @@ public static void runLongP129() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testLongM192(long[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (long)(data[j] + (long)-11); @@ -4206,14 +4614,22 @@ public static void runLongM192() { @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_L, "> 0", IRNode.ADD_VL, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testLongP192(long[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (long)(data[j] + (long)-11); @@ -4242,10 +4658,14 @@ public static void runLongP192() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP0(short[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (short)(data[j] * (short)-11); @@ -4286,14 +4706,22 @@ public static void runShortP0() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM1(short[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (short)(data[j] * (short)-11); @@ -4319,7 +4747,10 @@ public static void runShortM1() { // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 // positive byte_offset 2 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 2 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testShortP1(short[] data) { @@ -4350,10 +4781,14 @@ public static void runShortP1() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM2(short[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (short)(data[j] * (short)-11); @@ -4385,11 +4820,16 @@ public static void runShortM2() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 4 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP2(short[] data) { for (int j = 0; j < RANGE - 2; j++) { data[j + 2] = (short)(data[j] * (short)-11); @@ -4430,14 +4870,22 @@ public static void runShortP2() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM3(short[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (short)(data[j] * (short)-11); @@ -4469,11 +4917,16 @@ public static void runShortM3() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 6 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 6 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP3(short[] data) { for (int j = 0; j < RANGE - 3; j++) { data[j + 3] = (short)(data[j] * (short)-11); @@ -4502,10 +4955,14 @@ public static void runShortP3() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM4(short[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (short)(data[j] * (short)-11); @@ -4537,11 +4994,16 @@ public static void runShortM4() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 // positive byte_offset 8 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP4(short[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (short)(data[j] * (short)-11); @@ -4582,14 +5044,22 @@ public static void runShortP4() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM7(short[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (short)(data[j] * (short)-11); @@ -4621,11 +5091,16 @@ public static void runShortM7() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 14 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP7(short[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (short)(data[j] * (short)-11); @@ -4654,10 +5129,14 @@ public static void runShortP7() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM8(short[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (short)(data[j] * (short)-11); @@ -4692,11 +5171,19 @@ public static void runShortM8() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP8(short[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (short)(data[j] * (short)-11); @@ -4725,10 +5212,14 @@ public static void runShortP8() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM14(short[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (short)(data[j] * (short)-11); @@ -4759,11 +5250,15 @@ public static void runShortM14() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 28 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP14(short[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (short)(data[j] * (short)-11); @@ -4792,10 +5287,14 @@ public static void runShortP14() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM16(short[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (short)(data[j] * (short)-11); @@ -4833,14 +5332,19 @@ public static void runShortM16() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP16(short[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (short)(data[j] * (short)-11); @@ -4869,10 +5373,14 @@ public static void runShortP16() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM18(short[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (short)(data[j] * (short)-11); @@ -4902,10 +5410,15 @@ public static void runShortM18() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 36"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 36 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 36"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP18(short[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (short)(data[j] * (short)-11); @@ -4934,10 +5447,14 @@ public static void runShortP18() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM20(short[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (short)(data[j] * (short)-11); @@ -4967,10 +5484,15 @@ public static void runShortM20() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 40"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 40 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 40"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP20(short[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (short)(data[j] * (short)-11); @@ -5011,14 +5533,22 @@ public static void runShortP20() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM31(short[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (short)(data[j] * (short)-11); @@ -5048,10 +5578,15 @@ public static void runShortM31() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 62"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 62 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 62"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP31(short[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (short)(data[j] * (short)-11); @@ -5080,10 +5615,14 @@ public static void runShortP31() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM32(short[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (short)(data[j] * (short)-11); @@ -5124,14 +5663,19 @@ public static void runShortM32() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP32(short[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (short)(data[j] * (short)-11); @@ -5172,14 +5716,22 @@ public static void runShortP32() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM63(short[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (short)(data[j] * (short)-11); @@ -5208,10 +5760,15 @@ public static void runShortM63() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 126 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 126"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP63(short[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (short)(data[j] * (short)-11); @@ -5240,10 +5797,14 @@ public static void runShortP63() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM64(short[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (short)(data[j] * (short)-11); @@ -5284,14 +5845,19 @@ public static void runShortM64() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP64(short[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (short)(data[j] * (short)-11); @@ -5332,14 +5898,22 @@ public static void runShortP64() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM65(short[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (short)(data[j] * (short)-11); @@ -5368,10 +5942,15 @@ public static void runShortM65() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 130 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 130"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP65(short[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (short)(data[j] * (short)-11); @@ -5400,10 +5979,14 @@ public static void runShortP65() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM128(short[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (short)(data[j] * (short)-11); @@ -5444,14 +6027,22 @@ public static void runShortM128() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP128(short[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (short)(data[j] * (short)-11); @@ -5492,14 +6083,22 @@ public static void runShortP128() { @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_S, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortM129(short[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (short)(data[j] * (short)-11); @@ -5528,10 +6127,14 @@ public static void runShortM129() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortP129(short[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (short)(data[j] * (short)-11); @@ -5560,10 +6163,14 @@ public static void runShortP129() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testShortM192(short[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (short)(data[j] * (short)-11); @@ -5604,14 +6211,18 @@ public static void runShortM192() { @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_S, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testShortP192(short[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (short)(data[j] * (short)-11); @@ -5640,10 +6251,14 @@ public static void runShortP192() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP0(char[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (char)(data[j] * (char)-11); @@ -5684,14 +6299,22 @@ public static void runCharP0() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM1(char[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (char)(data[j] * (char)-11); @@ -5717,7 +6340,10 @@ public static void runCharM1() { // CPU: avx512bw -> vector_width: 64 -> elements in vector: 32 // positive byte_offset 2 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 2 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testCharP1(char[] data) { @@ -5748,10 +6374,14 @@ public static void runCharP1() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharM2(char[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (char)(data[j] * (char)-11); @@ -5783,11 +6413,16 @@ public static void runCharM2() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 4 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP2(char[] data) { for (int j = 0; j < RANGE - 2; j++) { data[j + 2] = (char)(data[j] * (char)-11); @@ -5828,14 +6463,22 @@ public static void runCharP2() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM3(char[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (char)(data[j] * (char)-11); @@ -5867,11 +6510,16 @@ public static void runCharM3() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 // positive byte_offset 6 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 6 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 6"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP3(char[] data) { for (int j = 0; j < RANGE - 3; j++) { data[j + 3] = (char)(data[j] * (char)-11); @@ -5900,10 +6548,14 @@ public static void runCharP3() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM4(char[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (char)(data[j] * (char)-11); @@ -5935,11 +6587,16 @@ public static void runCharM4() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 8 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP4(char[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (char)(data[j] * (char)-11); @@ -5980,14 +6637,22 @@ public static void runCharP4() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM7(char[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (char)(data[j] * (char)-11); @@ -6019,11 +6684,16 @@ public static void runCharM7() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 14 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP7(char[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (char)(data[j] * (char)-11); @@ -6052,10 +6722,14 @@ public static void runCharP7() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM8(char[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (char)(data[j] * (char)-11); @@ -6090,11 +6764,19 @@ public static void runCharM8() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP8(char[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (char)(data[j] * (char)-11); @@ -6123,10 +6805,14 @@ public static void runCharP8() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM14(char[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (char)(data[j] * (char)-11); @@ -6157,11 +6843,15 @@ public static void runCharM14() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 // positive byte_offset 28 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 28"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP14(char[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (char)(data[j] * (char)-11); @@ -6190,10 +6880,14 @@ public static void runCharP14() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM16(char[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (char)(data[j] * (char)-11); @@ -6231,14 +6925,19 @@ public static void runCharM16() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP16(char[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (char)(data[j] * (char)-11); @@ -6267,10 +6966,14 @@ public static void runCharP16() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharM18(char[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (char)(data[j] * (char)-11); @@ -6300,10 +7003,15 @@ public static void runCharM18() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 36"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 36 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 36"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP18(char[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (char)(data[j] * (char)-11); @@ -6332,10 +7040,14 @@ public static void runCharP18() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharM20(char[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (char)(data[j] * (char)-11); @@ -6365,10 +7077,15 @@ public static void runCharM20() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 40"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 40 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 40"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP20(char[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (char)(data[j] * (char)-11); @@ -6409,14 +7126,22 @@ public static void runCharP20() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM31(char[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (char)(data[j] * (char)-11); @@ -6446,10 +7171,15 @@ public static void runCharM31() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 62"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 62 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 62"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP31(char[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (char)(data[j] * (char)-11); @@ -6478,10 +7208,14 @@ public static void runCharP31() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM32(char[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (char)(data[j] * (char)-11); @@ -6522,14 +7256,19 @@ public static void runCharM32() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP32(char[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (char)(data[j] * (char)-11); @@ -6570,14 +7309,22 @@ public static void runCharP32() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM63(char[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (char)(data[j] * (char)-11); @@ -6606,10 +7353,15 @@ public static void runCharM63() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 126 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 126"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP63(char[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (char)(data[j] * (char)-11); @@ -6638,10 +7390,14 @@ public static void runCharP63() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM64(char[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (char)(data[j] * (char)-11); @@ -6682,14 +7438,19 @@ public static void runCharM64() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP64(char[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (char)(data[j] * (char)-11); @@ -6730,14 +7491,22 @@ public static void runCharP64() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharM65(char[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (char)(data[j] * (char)-11); @@ -6766,10 +7535,15 @@ public static void runCharM65() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + // positive byte_offset 130 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 130"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP65(char[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (char)(data[j] * (char)-11); @@ -6798,10 +7572,14 @@ public static void runCharP65() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM128(char[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (char)(data[j] * (char)-11); @@ -6842,14 +7620,22 @@ public static void runCharM128() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharP128(char[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (char)(data[j] * (char)-11); @@ -6890,14 +7676,22 @@ public static void runCharP128() { @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_C, IRNode.MUL_VS, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM129(char[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (char)(data[j] * (char)-11); @@ -6926,10 +7720,14 @@ public static void runCharM129() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP129(char[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (char)(data[j] * (char)-11); @@ -6958,10 +7756,14 @@ public static void runCharP129() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testCharM192(char[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (char)(data[j] * (char)-11); @@ -7002,14 +7804,18 @@ public static void runCharM192() { @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 16 + // CPU: asimd -> vector_width: 16 -> elements in vector: 8 @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 128 + @IR(counts = {IRNode.LOAD_VECTOR_C, "> 0", IRNode.MUL_VS, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testCharP192(char[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (char)(data[j] * (char)-11); @@ -7038,10 +7844,14 @@ public static void runCharP192() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP0(byte[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (byte)(data[j] * (byte)11); @@ -7082,14 +7892,22 @@ public static void runByteP0() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteM1(byte[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (byte)(data[j] * (byte)11); @@ -7115,7 +7933,10 @@ public static void runByteM1() { // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 // positive byte_offset 1 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 1 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 1 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testByteP1(byte[] data) { @@ -7158,14 +7979,22 @@ public static void runByteP1() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteM2(byte[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (byte)(data[j] * (byte)11); @@ -7191,7 +8020,10 @@ public static void runByteM2() { // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 // positive byte_offset 2 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 2 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 2 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testByteP2(byte[] data) { @@ -7234,14 +8066,22 @@ public static void runByteP2() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM3(byte[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (byte)(data[j] * (byte)11); @@ -7267,7 +8107,10 @@ public static void runByteM3() { // CPU: avx512bw -> vector_width: 64 -> elements in vector: 64 // positive byte_offset 3 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 3 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 3 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testByteP3(byte[] data) { @@ -7298,10 +8141,14 @@ public static void runByteP3() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM4(byte[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (byte)(data[j] * (byte)11); @@ -7333,11 +8180,16 @@ public static void runByteM4() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 // positive byte_offset 4 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 4 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP4(byte[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (byte)(data[j] * (byte)11); @@ -7378,14 +8230,22 @@ public static void runByteP4() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM7(byte[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (byte)(data[j] * (byte)11); @@ -7417,11 +8277,16 @@ public static void runByteM7() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 7"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 // positive byte_offset 7 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 7"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 7 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 7"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP7(byte[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (byte)(data[j] * (byte)11); @@ -7450,10 +8315,14 @@ public static void runByteP7() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM8(byte[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (byte)(data[j] * (byte)11); @@ -7485,11 +8354,16 @@ public static void runByteM8() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 8 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP8(byte[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (byte)(data[j] * (byte)11); @@ -7530,14 +8404,22 @@ public static void runByteP8() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteM14(byte[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (byte)(data[j] * (byte)11); @@ -7569,11 +8451,16 @@ public static void runByteM14() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + // positive byte_offset 14 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 14 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 14"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP14(byte[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (byte)(data[j] * (byte)11); @@ -7602,10 +8489,14 @@ public static void runByteP14() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteM16(byte[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (byte)(data[j] * (byte)11); @@ -7640,11 +8531,19 @@ public static void runByteM16() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP16(byte[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (byte)(data[j] * (byte)11); @@ -7685,14 +8584,22 @@ public static void runByteP16() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteM18(byte[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (byte)(data[j] * (byte)11); @@ -7723,11 +8630,15 @@ public static void runByteM18() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 18"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 18 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 18"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP18(byte[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (byte)(data[j] * (byte)11); @@ -7756,10 +8667,14 @@ public static void runByteP18() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM20(byte[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (byte)(data[j] * (byte)11); @@ -7790,11 +8705,15 @@ public static void runByteM20() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 20"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 20 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 20"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP20(byte[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (byte)(data[j] * (byte)11); @@ -7835,14 +8754,22 @@ public static void runByteP20() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM31(byte[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (byte)(data[j] * (byte)11); @@ -7873,11 +8800,15 @@ public static void runByteM31() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 31"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 // positive byte_offset 31 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 31"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteP31(byte[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (byte)(data[j] * (byte)11); @@ -7906,10 +8837,14 @@ public static void runByteP31() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM32(byte[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (byte)(data[j] * (byte)11); @@ -7947,14 +8882,19 @@ public static void runByteM32() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP32(byte[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (byte)(data[j] * (byte)11); @@ -7995,14 +8935,22 @@ public static void runByteP32() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM63(byte[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (byte)(data[j] * (byte)11); @@ -8032,10 +8980,15 @@ public static void runByteM63() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 63"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 63 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 63"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP63(byte[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (byte)(data[j] * (byte)11); @@ -8064,10 +9017,14 @@ public static void runByteP63() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM64(byte[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (byte)(data[j] * (byte)11); @@ -8108,14 +9065,19 @@ public static void runByteM64() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP64(byte[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (byte)(data[j] * (byte)11); @@ -8156,14 +9118,22 @@ public static void runByteP64() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM65(byte[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (byte)(data[j] * (byte)11); @@ -8192,10 +9162,15 @@ public static void runByteM65() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 65 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 65"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP65(byte[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (byte)(data[j] * (byte)11); @@ -8224,10 +9199,14 @@ public static void runByteP65() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteM128(byte[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (byte)(data[j] * (byte)11); @@ -8268,14 +9247,19 @@ public static void runByteM128() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP128(byte[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (byte)(data[j] * (byte)11); @@ -8316,14 +9300,22 @@ public static void runByteP128() { @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_B, IRNode.MUL_VB, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM129(byte[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (byte)(data[j] * (byte)11); @@ -8352,10 +9344,15 @@ public static void runByteM129() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 129 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 129"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP129(byte[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (byte)(data[j] * (byte)11); @@ -8384,10 +9381,14 @@ public static void runByteP129() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testByteM192(byte[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (byte)(data[j] * (byte)11); @@ -8428,14 +9429,19 @@ public static void runByteM192() { @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, applyIfCPUFeature = {"avx512bw", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 32 + // CPU: asimd -> vector_width: 16 -> elements in vector: 16 @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 4"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 256 + // positive byte_offset 192 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_B, "> 0", IRNode.MUL_VB, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 4", "MaxVectorSize", "<= 192"}, + applyIfCPUFeature = {"sve", "true"}) public static void testByteP192(byte[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (byte)(data[j] * (byte)11); @@ -8464,10 +9470,14 @@ public static void runByteP192() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP0(float[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (float)(data[j] * (float)1.001f); @@ -8508,14 +9518,22 @@ public static void runFloatP0() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM1(float[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (float)(data[j] * (float)1.001f); @@ -8541,7 +9559,10 @@ public static void runFloatM1() { // CPU: avx512 -> vector_width: 64 -> elements in vector: 16 // positive byte_offset 4 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 4 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 4 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testFloatP1(float[] data) { @@ -8572,10 +9593,14 @@ public static void runFloatP1() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM2(float[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (float)(data[j] * (float)1.001f); @@ -8607,11 +9632,16 @@ public static void runFloatM2() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 8 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 8 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP2(float[] data) { for (int j = 0; j < RANGE - 2; j++) { data[j + 2] = (float)(data[j] * (float)1.001f); @@ -8652,14 +9682,22 @@ public static void runFloatP2() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM3(float[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (float)(data[j] * (float)1.001f); @@ -8691,11 +9729,16 @@ public static void runFloatM3() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + // positive byte_offset 12 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 12 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 12"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP3(float[] data) { for (int j = 0; j < RANGE - 3; j++) { data[j + 3] = (float)(data[j] * (float)1.001f); @@ -8724,10 +9767,14 @@ public static void runFloatP3() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM4(float[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (float)(data[j] * (float)1.001f); @@ -8762,11 +9809,19 @@ public static void runFloatM4() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP4(float[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (float)(data[j] * (float)1.001f); @@ -8807,14 +9862,22 @@ public static void runFloatP4() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM7(float[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (float)(data[j] * (float)1.001f); @@ -8845,11 +9908,15 @@ public static void runFloatM7() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 // positive byte_offset 28 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 28"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP7(float[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (float)(data[j] * (float)1.001f); @@ -8878,10 +9945,14 @@ public static void runFloatP7() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM8(float[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (float)(data[j] * (float)1.001f); @@ -8919,14 +9990,19 @@ public static void runFloatM8() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP8(float[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (float)(data[j] * (float)1.001f); @@ -8955,10 +10031,14 @@ public static void runFloatP8() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM14(float[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (float)(data[j] * (float)1.001f); @@ -8988,10 +10068,15 @@ public static void runFloatM14() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 56"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 56 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 56"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP14(float[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (float)(data[j] * (float)1.001f); @@ -9020,10 +10105,14 @@ public static void runFloatP14() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM16(float[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (float)(data[j] * (float)1.001f); @@ -9064,14 +10153,19 @@ public static void runFloatM16() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP16(float[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (float)(data[j] * (float)1.001f); @@ -9100,10 +10194,14 @@ public static void runFloatP16() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM18(float[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (float)(data[j] * (float)1.001f); @@ -9132,10 +10230,15 @@ public static void runFloatM18() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 72 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 72"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP18(float[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (float)(data[j] * (float)1.001f); @@ -9164,10 +10267,14 @@ public static void runFloatP18() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM20(float[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (float)(data[j] * (float)1.001f); @@ -9200,10 +10307,19 @@ public static void runFloatM20() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 80 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 80"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP20(float[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (float)(data[j] * (float)1.001f); @@ -9244,14 +10360,22 @@ public static void runFloatP20() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM31(float[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (float)(data[j] * (float)1.001f); @@ -9280,10 +10404,15 @@ public static void runFloatM31() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 124 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 124"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP31(float[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (float)(data[j] * (float)1.001f); @@ -9312,10 +10441,14 @@ public static void runFloatP31() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM32(float[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (float)(data[j] * (float)1.001f); @@ -9356,14 +10489,19 @@ public static void runFloatM32() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP32(float[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (float)(data[j] * (float)1.001f); @@ -9404,14 +10542,22 @@ public static void runFloatP32() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM63(float[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (float)(data[j] * (float)1.001f); @@ -9440,10 +10586,15 @@ public static void runFloatM63() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 + // positive byte_offset 252 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8", "MaxVectorSize", "<= 252"}, + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP63(float[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (float)(data[j] * (float)1.001f); @@ -9472,10 +10623,14 @@ public static void runFloatP63() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM64(float[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (float)(data[j] * (float)1.001f); @@ -9516,14 +10671,22 @@ public static void runFloatM64() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP64(float[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (float)(data[j] * (float)1.001f); @@ -9564,14 +10727,22 @@ public static void runFloatP64() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM65(float[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (float)(data[j] * (float)1.001f); @@ -9600,10 +10771,14 @@ public static void runFloatM65() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP65(float[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (float)(data[j] * (float)1.001f); @@ -9632,10 +10807,14 @@ public static void runFloatP65() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM128(float[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (float)(data[j] * (float)1.001f); @@ -9676,14 +10855,22 @@ public static void runFloatM128() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP128(float[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (float)(data[j] * (float)1.001f); @@ -9724,14 +10911,22 @@ public static void runFloatP128() { @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_F, IRNode.MUL_VF, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM129(float[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (float)(data[j] * (float)1.001f); @@ -9760,10 +10955,14 @@ public static void runFloatM129() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP129(float[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (float)(data[j] * (float)1.001f); @@ -9792,10 +10991,14 @@ public static void runFloatP129() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatM192(float[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (float)(data[j] * (float)1.001f); @@ -9836,14 +11039,22 @@ public static void runFloatM192() { @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 8 + // CPU: asimd -> vector_width: 16 -> elements in vector: 4 + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 64 @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_F, "> 0", IRNode.MUL_VF, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 8"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testFloatP192(float[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (float)(data[j] * (float)1.001f); @@ -9872,10 +11083,14 @@ public static void runFloatP192() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP0(double[] data) { for (int j = 0; j < RANGE; j++) { data[j + 0] = (double)(data[j] * (double)1.001); @@ -9916,14 +11131,22 @@ public static void runDoubleP0() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM1(double[] data) { for (int j = 1; j < RANGE; j++) { data[j + -1] = (double)(data[j] * (double)1.001); @@ -9949,7 +11172,10 @@ public static void runDoubleM1() { // CPU: avx512 -> vector_width: 64 -> elements in vector: 8 // positive byte_offset 8 can lead to cyclic dependency // No positive IR rule: conditions impossible. - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + // positive byte_offset 8 can lead to cyclic dependency + // No positive IR rule: conditions impossible. + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 // positive byte_offset 8 can lead to cyclic dependency // No positive IR rule: conditions impossible. public static void testDoubleP1(double[] data) { @@ -9980,10 +11206,14 @@ public static void runDoubleP1() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM2(double[] data) { for (int j = 2; j < RANGE; j++) { data[j + -2] = (double)(data[j] * (double)1.001); @@ -10018,11 +11248,19 @@ public static void runDoubleM2() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 // positive byte_offset 16 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP2(double[] data) { for (int j = 0; j < RANGE - 2; j++) { data[j + 2] = (double)(data[j] * (double)1.001); @@ -10063,14 +11301,22 @@ public static void runDoubleP2() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM3(double[] data) { for (int j = 3; j < RANGE; j++) { data[j + -3] = (double)(data[j] * (double)1.001); @@ -10101,11 +11347,15 @@ public static void runDoubleM3() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 // positive byte_offset 24 can lead to cyclic dependency @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 24"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP3(double[] data) { for (int j = 0; j < RANGE - 3; j++) { data[j + 3] = (double)(data[j] * (double)1.001); @@ -10134,10 +11384,14 @@ public static void runDoubleP3() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM4(double[] data) { for (int j = 4; j < RANGE; j++) { data[j + -4] = (double)(data[j] * (double)1.001); @@ -10175,14 +11429,19 @@ public static void runDoubleM4() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 32"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 32 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 32"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP4(double[] data) { for (int j = 0; j < RANGE - 4; j++) { data[j + 4] = (double)(data[j] * (double)1.001); @@ -10223,14 +11482,22 @@ public static void runDoubleP4() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM7(double[] data) { for (int j = 7; j < RANGE; j++) { data[j + -7] = (double)(data[j] * (double)1.001); @@ -10260,10 +11527,15 @@ public static void runDoubleM7() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 56"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 56 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 56"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP7(double[] data) { for (int j = 0; j < RANGE - 7; j++) { data[j + 7] = (double)(data[j] * (double)1.001); @@ -10292,10 +11564,14 @@ public static void runDoubleP7() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM8(double[] data) { for (int j = 8; j < RANGE; j++) { data[j + -8] = (double)(data[j] * (double)1.001); @@ -10336,14 +11612,19 @@ public static void runDoubleM8() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 64 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 64"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP8(double[] data) { for (int j = 0; j < RANGE - 8; j++) { data[j + 8] = (double)(data[j] * (double)1.001); @@ -10372,10 +11653,14 @@ public static void runDoubleP8() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM14(double[] data) { for (int j = 14; j < RANGE; j++) { data[j + -14] = (double)(data[j] * (double)1.001); @@ -10408,10 +11693,19 @@ public static void runDoubleM14() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 112 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 112"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP14(double[] data) { for (int j = 0; j < RANGE - 14; j++) { data[j + 14] = (double)(data[j] * (double)1.001); @@ -10440,10 +11734,14 @@ public static void runDoubleP14() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM16(double[] data) { for (int j = 16; j < RANGE; j++) { data[j + -16] = (double)(data[j] * (double)1.001); @@ -10484,14 +11782,19 @@ public static void runDoubleM16() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 128 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 128"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP16(double[] data) { for (int j = 0; j < RANGE - 16; j++) { data[j + 16] = (double)(data[j] * (double)1.001); @@ -10520,10 +11823,14 @@ public static void runDoubleP16() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM18(double[] data) { for (int j = 18; j < RANGE; j++) { data[j + -18] = (double)(data[j] * (double)1.001); @@ -10556,10 +11863,19 @@ public static void runDoubleM18() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 144 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 144"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP18(double[] data) { for (int j = 0; j < RANGE - 18; j++) { data[j + 18] = (double)(data[j] * (double)1.001); @@ -10588,10 +11904,14 @@ public static void runDoubleP18() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM20(double[] data) { for (int j = 20; j < RANGE; j++) { data[j + -20] = (double)(data[j] * (double)1.001); @@ -10628,14 +11948,19 @@ public static void runDoubleM20() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 160 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 160"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP20(double[] data) { for (int j = 0; j < RANGE - 20; j++) { data[j + 20] = (double)(data[j] * (double)1.001); @@ -10676,14 +12001,22 @@ public static void runDoubleP20() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM31(double[] data) { for (int j = 31; j < RANGE; j++) { data[j + -31] = (double)(data[j] * (double)1.001); @@ -10712,10 +12045,15 @@ public static void runDoubleM31() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 + // positive byte_offset 248 can lead to cyclic dependency + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16", "MaxVectorSize", "<= 248"}, + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP31(double[] data) { for (int j = 0; j < RANGE - 31; j++) { data[j + 31] = (double)(data[j] * (double)1.001); @@ -10744,10 +12082,14 @@ public static void runDoubleP31() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM32(double[] data) { for (int j = 32; j < RANGE; j++) { data[j + -32] = (double)(data[j] * (double)1.001); @@ -10788,14 +12130,22 @@ public static void runDoubleM32() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP32(double[] data) { for (int j = 0; j < RANGE - 32; j++) { data[j + 32] = (double)(data[j] * (double)1.001); @@ -10836,14 +12186,22 @@ public static void runDoubleP32() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM63(double[] data) { for (int j = 63; j < RANGE; j++) { data[j + -63] = (double)(data[j] * (double)1.001); @@ -10872,10 +12230,14 @@ public static void runDoubleM63() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP63(double[] data) { for (int j = 0; j < RANGE - 63; j++) { data[j + 63] = (double)(data[j] * (double)1.001); @@ -10904,10 +12266,14 @@ public static void runDoubleP63() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM64(double[] data) { for (int j = 64; j < RANGE; j++) { data[j + -64] = (double)(data[j] * (double)1.001); @@ -10948,14 +12314,22 @@ public static void runDoubleM64() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP64(double[] data) { for (int j = 0; j < RANGE - 64; j++) { data[j + 64] = (double)(data[j] * (double)1.001); @@ -10996,14 +12370,22 @@ public static void runDoubleP64() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM65(double[] data) { for (int j = 65; j < RANGE; j++) { data[j + -65] = (double)(data[j] * (double)1.001); @@ -11032,10 +12414,14 @@ public static void runDoubleM65() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP65(double[] data) { for (int j = 0; j < RANGE - 65; j++) { data[j + 65] = (double)(data[j] * (double)1.001); @@ -11064,10 +12450,14 @@ public static void runDoubleP65() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM128(double[] data) { for (int j = 128; j < RANGE; j++) { data[j + -128] = (double)(data[j] * (double)1.001); @@ -11108,14 +12498,22 @@ public static void runDoubleM128() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP128(double[] data) { for (int j = 0; j < RANGE - 128; j++) { data[j + 128] = (double)(data[j] * (double)1.001); @@ -11156,14 +12554,22 @@ public static void runDoubleP128() { @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Strict alignment not possible. + @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, + applyIf = {"AlignVector", "true"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Strict alignment not possible. @IR(failOn = {IRNode.LOAD_VECTOR_D, IRNode.MUL_VD, IRNode.STORE_VECTOR}, applyIf = {"AlignVector", "true"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM129(double[] data) { for (int j = 129; j < RANGE; j++) { data[j + -129] = (double)(data[j] * (double)1.001); @@ -11192,10 +12598,14 @@ public static void runDoubleM129() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP129(double[] data) { for (int j = 0; j < RANGE - 129; j++) { data[j + 129] = (double)(data[j] * (double)1.001); @@ -11224,10 +12634,14 @@ public static void runDoubleP129() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleM192(double[] data) { for (int j = 192; j < RANGE; j++) { data[j + -192] = (double)(data[j] * (double)1.001); @@ -11268,14 +12682,22 @@ public static void runDoubleM192() { @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, applyIfCPUFeature = {"avx512", "true"}) - // CPU: asimd -> vector_width: 32 -> elements in vector: 4 + // CPU: asimd -> vector_width: 16 -> elements in vector: 2 + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // Vectorize when strict alignment guaranteed. + @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, + applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, + applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"}) + // CPU: sve -> max vector_width: 256 -> max elements in vector: 32 @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "false", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) // Vectorize when strict alignment guaranteed. @IR(counts = {IRNode.LOAD_VECTOR_D, "> 0", IRNode.MUL_VD, "> 0", IRNode.STORE_VECTOR, "> 0"}, applyIfAnd = {"AlignVector", "true", "MaxVectorSize", ">= 16"}, - applyIfCPUFeature = {"asimd", "true"}) + applyIfCPUFeature = {"sve", "true"}) public static void testDoubleP192(double[] data) { for (int j = 0; j < RANGE - 192; j++) { data[j + 192] = (double)(data[j] * (double)1.001);