diff --git a/code/examples.cpp b/code/examples.cpp index 0cba4725..3cca5ab1 100644 --- a/code/examples.cpp +++ b/code/examples.cpp @@ -1,5 +1,6 @@ #include "common.h" +// some intrinsics are defined as macros, and they don't linke extra commas #define COMMA , void test() { @@ -281,4 +282,76 @@ void test() { 0xabababab12121212, 0x1234567812345678})); PRINT(__lasx_xvmskltz_d(__m256i{0x0000111800000000, 0x0081000081111111, 0x8111000008010101, 0x0000000000000000})); + + // Shuffling + PRINT(__lsx_vshuf_b(__m128i{0x1122334455667788, 0x99aabbccddeeff00}, + __m128i{0xabcdef1314156678, 0x1234123443214321}, + __m128i{0x0011021304050607, 0x0811120213031404})); + PRINT(__lasx_xvshuf_b(__m256i{0x1122334455667788, 0x99aabbccddeeff00, + 0xabcdef1212341234, 0xaabbaabbddeeddee}, + __m256i{0xabcdef1314156678, 0x1234123443214321, + 0x1234123443214321, 0x5678567856785678}, + __m256i{0x1f1f00001a0a1b0b, 0x1111120213031404, + 0x0102030405060708, 0x1112131405060708})); + + PRINT(__lsx_vshuf_h(__m128i{0x0001000200030004, 0x0005000a000b000c}, + __m128i{0x1122334455667788, 0x99aabbccddeeff00}, + __m128i{0xabcdef1314156678, 0x1234123443214321})); + PRINT(__lasx_xvshuf_h(__m256i{0x0001000200030004, 0x0005000a000b000c, + 0x000f000e00010002, 0x0008000900020001}, + __m256i{0x1122334455667788, 0x99aabbccddeeff00, + 0xabcdef1212341234, 0xaabbaabbddeeddee}, + __m256i{0xabcdef1314156678, 0x1234123443214321, + 0x1234123443214321, 0x5678567856785678})); + + PRINT(__lsx_vshuf_w(__m128i{0x0000000200000004, 0x0000000700000005}, + __m128i{0x1122334455667788, 0x99aabbccddeeff00}, + __m128i{0xabcdef1314156678, 0x1234123443214321})); + PRINT(__lasx_xvshuf_w(__m256i{0x0000000200000004, 0x0000000700000005, + 0x0000000100000003, 0x0000000400000000}, + __m256i{0x1122334455667788, 0x99aabbccddeeff00, + 0xabcdef1212341234, 0xaabbaabbddeeddee}, + __m256i{0xabcdef1314156678, 0x1234123443214321, + 0x1234123443214321, 0x5678567856785678})); + + PRINT(__lsx_vshuf_d(__m128i{0x0000000000000001, 0x0000000000000002}, + __m128i{0x1122334455667788, 0x99aabbccddeeff00}, + __m128i{0xabcdef1314156678, 0x1234123443214321})); + PRINT(__lasx_xvshuf_d(__m256i{0x0000000000000000, 0x0000000000000003, + 0x0000000000000002, 0x0000000000000001}, + __m256i{0x1122334455667788, 0x99aabbccddeeff00, + 0xabcdef1212341234, 0xaabbaabbddeeddee}, + __m256i{0xabcdef1314156678, 0x1234123443214321, + 0x1234123443214321, 0x5678567856785678})); + + PRINT(__lsx_vshuf4i_b(__m128i{0xabcdef1314156678 COMMA 0x1234123443214321}, + 0x12)); + PRINT(__lasx_xvshuf4i_b( + __m256i{ + 0xabcdef1314156678 COMMA 0x1234123443214321 COMMA 0x1234123443214321 COMMA 0x5678567856785678}, + 0x12)); + + PRINT(__lsx_vshuf4i_h(__m128i{0xabcdef1314156678 COMMA 0x1234123443214321}, + 0x12)); + PRINT(__lasx_xvshuf4i_h( + __m256i{ + 0xabcdef1314156678 COMMA 0x1234123443214321 COMMA 0x1234123443214321 COMMA 0x5678567856785678}, + 0x12)); + + PRINT(__lsx_vshuf4i_w(__m128i{0xabcdef1314156678 COMMA 0x1234123443214321}, + 0x12)); + PRINT(__lasx_xvshuf4i_w( + __m256i{ + 0xabcdef1314156678 COMMA 0x1234123443214321 COMMA 0x1234123443214321 COMMA 0x5678567856785678}, + 0x12)); + + PRINT(__lsx_vshuf4i_d(__m128i{0x1122334455667788 COMMA 0x99aabbccddeeff00}, + __m128i{0xabcdef1314156678 COMMA 0x1234123443214321}, + 0x12)); + PRINT(__lasx_xvshuf4i_d( + __m256i{ + 0x1122334455667788 COMMA 0x99aabbccddeeff00 COMMA 0xabcdef1212341234 COMMA 0xaabbaabbddeeddee}, + __m256i{ + 0xabcdef1314156678 COMMA 0x1234123443214321 COMMA 0x1234123443214321 COMMA 0x5678567856785678}, + 0x12)); } \ No newline at end of file diff --git a/code/examples.md b/code/examples.md index a3bd4312..d9a2f0ba 100644 --- a/code/examples.md +++ b/code/examples.md @@ -99,3 +99,19 @@ __m128i __lsx_vmskltz_d(__m128i{0x1122334455667788, 0x99aabbccddeeff00}): 0x0000 __m128i __lsx_vmskltz_d(__m128i{0x0000808000000000, 0x0081000081716151}): 0x0000000000000000 0x0000000000000000 __m256i __lasx_xvmskltz_d(__m256i{0x1122334455667788, 0x99aabbccddeeff00, 0xabababab12121212, 0x1234567812345678}): 0x0000000000000002 0x0000000000000000 0x0000000000000001 0x0000000000000000 __m256i __lasx_xvmskltz_d(__m256i{0x0000111800000000, 0x0081000081111111, 0x8111000008010101, 0x0000000000000000}): 0x0000000000000000 0x0000000000000000 0x0000000000000001 0x0000000000000000 +__m128i __lsx_vshuf_b(__m128i{0x1122334455667788, 0x99aabbccddeeff00}, __m128i{0xabcdef1314156678, 0x1234123443214321}, __m128i{0x0011021304050607, 0x0811120213031404}): 0x7877155513efcdab 0x2177661555144413 +__m256i __lasx_xvshuf_b(__m256i{0x1122334455667788, 0x99aabbccddeeff00, 0xabcdef1212341234, 0xaabbaabbddeeddee}, __m256i{0xabcdef1314156678, 0x1234123443214321, 0x1234123443214321, 0x5678567856785678}, __m256i{0x1f1f00001a0a1b0b, 0x1111120213031404, 0x0102030405060708, 0x1112131405060708}): 0x99997878ee21dd43 0x7777661555144413 0x4321433412341278 0x1234121212341278 +__m128i __lsx_vshuf_h(__m128i{0x0001000200030004, 0x0005000a000b000c}, __m128i{0x1122334455667788, 0x99aabbccddeeff00}, __m128i{0xabcdef1314156678, 0x1234123443214321}): 0x1415ef13abcd4321 0x432133441122ff00 +__m256i __lasx_xvshuf_h(__m256i{0x0001000200030004, 0x0005000a000b000c, 0x000f000e00010002, 0x0008000900020001}, __m256i{0x1122334455667788, 0x99aabbccddeeff00, 0xabcdef1212341234, 0xaabbaabbddeeddee}, __m256i{0xabcdef1314156678, 0x1234123443214321, 0x1234123443214321, 0x5678567856785678}): 0x1415ef13abcd4321 0x432133441122ff00 0xaabbaabb43211234 0x1234123412344321 +__m128i __lsx_vshuf_w(__m128i{0x0000000200000004, 0x0000000700000005}, __m128i{0x1122334455667788, 0x99aabbccddeeff00}, __m128i{0xabcdef1314156678, 0x1234123443214321}): 0x4321432155667788 0x99aabbcc11223344 +__m256i __lasx_xvshuf_w(__m256i{0x0000000200000004, 0x0000000700000005, 0x0000000100000003, 0x0000000400000000}, __m256i{0x1122334455667788, 0x99aabbccddeeff00, 0xabcdef1212341234, 0xaabbaabbddeeddee}, __m256i{0xabcdef1314156678, 0x1234123443214321, 0x1234123443214321, 0x5678567856785678}): 0x4321432155667788 0x99aabbcc11223344 0x1234123456785678 0x1234123443214321 +__m128i __lsx_vshuf_d(__m128i{0x0000000000000001, 0x0000000000000002}, __m128i{0x1122334455667788, 0x99aabbccddeeff00}, __m128i{0xabcdef1314156678, 0x1234123443214321}): 0x1234123443214321 0x1122334455667788 +__m256i __lasx_xvshuf_d(__m256i{0x0000000000000000, 0x0000000000000003, 0x0000000000000002, 0x0000000000000001}, __m256i{0x1122334455667788, 0x99aabbccddeeff00, 0xabcdef1212341234, 0xaabbaabbddeeddee}, __m256i{0xabcdef1314156678, 0x1234123443214321, 0x1234123443214321, 0x5678567856785678}): 0xabcdef1314156678 0x99aabbccddeeff00 0xabcdef1212341234 0x5678567856785678 +__m128i __lsx_vshuf4i_b(__m128i{0xabcdef1314156678 COMMA 0x1234123443214321}, 0x12): 0x13ef13cd78667815 0x3412343421432121 +__m256i __lasx_xvshuf4i_b( __m256i{ 0xabcdef1314156678 COMMA 0x1234123443214321 COMMA 0x1234123443214321 COMMA 0x5678567856785678}, 0x12): 0x13ef13cd78667815 0x3412343421432121 0x3412343421432121 0x7856787878567878 +__m128i __lsx_vshuf4i_h(__m128i{0xabcdef1314156678 COMMA 0x1234123443214321}, 0x12): 0x667814156678ef13 0x4321432143211234 +__m256i __lasx_xvshuf4i_h( __m256i{ 0xabcdef1314156678 COMMA 0x1234123443214321 COMMA 0x1234123443214321 COMMA 0x5678567856785678}, 0x12): 0x667814156678ef13 0x4321432143211234 0x4321432143211234 0x5678567856785678 +__m128i __lsx_vshuf4i_w(__m128i{0xabcdef1314156678 COMMA 0x1234123443214321}, 0x12): 0x1415667843214321 0x14156678abcdef13 +__m256i __lasx_xvshuf4i_w( __m256i{ 0xabcdef1314156678 COMMA 0x1234123443214321 COMMA 0x1234123443214321 COMMA 0x5678567856785678}, 0x12): 0x1415667843214321 0x14156678abcdef13 0x4321432156785678 0x4321432112341234 +__m128i __lsx_vshuf4i_d(__m128i{0x1122334455667788 COMMA 0x99aabbccddeeff00}, __m128i{0xabcdef1314156678 COMMA 0x1234123443214321}, 0x12): 0xabcdef1314156678 0x1122334455667788 +__m256i __lasx_xvshuf4i_d( __m256i{ 0x1122334455667788 COMMA 0x99aabbccddeeff00 COMMA 0xabcdef1212341234 COMMA 0xaabbaabbddeeddee}, __m256i{ 0xabcdef1314156678 COMMA 0x1234123443214321 COMMA 0x1234123443214321 COMMA 0x5678567856785678}, 0x12): 0xabcdef1314156678 0x1122334455667788 0x1234123443214321 0xabcdef1212341234