Skip to content

Commit 7172c68

Browse files
Accelerate additional cross platform hardware intrinsics (#61649)
* Updating Vector64/128/256.IsHardwareAccelerated to be treated as a constant and return true where supported * Accelerate the CmpOpAll intrinsics * Accelerate the CmpOpAny intrinsics * Accelerate the ConverToDouble/Int32/Int64/Single/UInt32/UInt64 intrinsics * Applying formatting patch * Fixing ConvertToInt32 and ConvertToSingle to use the right intrinsic * Fixing some issues and assert types are correct * Updating ConvertToDouble and ConvertToSingle to have correct vectorized versions on x86/x64 * Ensure Vector<T>.ConvertToDouble/Single are accelerated * Swap operands and invert immediate so the constant can be contained on blend * Restrict ConvertToDouble(Vector128<UInt64>) tests to inputs no more than long.MaxValue * Ensure that we create a long/ulong rather than a uint
1 parent 56ffec2 commit 7172c68

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+3188
-866
lines changed

src/coreclr/jit/codegenarm64.cpp

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3963,10 +3963,6 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
39633963
break;
39643964

39653965
case SIMDIntrinsicCast:
3966-
case SIMDIntrinsicConvertToSingle:
3967-
case SIMDIntrinsicConvertToInt32:
3968-
case SIMDIntrinsicConvertToDouble:
3969-
case SIMDIntrinsicConvertToInt64:
39703966
genSIMDIntrinsicUnOp(simdNode);
39713967
break;
39723968

@@ -4051,10 +4047,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
40514047
case SIMDIntrinsicCast:
40524048
result = INS_mov;
40534049
break;
4054-
case SIMDIntrinsicConvertToInt32:
4055-
case SIMDIntrinsicConvertToInt64:
4056-
result = INS_fcvtzs;
4057-
break;
40584050
case SIMDIntrinsicEqual:
40594051
result = INS_fcmeq;
40604052
break;
@@ -4081,10 +4073,6 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
40814073
case SIMDIntrinsicCast:
40824074
result = INS_mov;
40834075
break;
4084-
case SIMDIntrinsicConvertToDouble:
4085-
case SIMDIntrinsicConvertToSingle:
4086-
result = isUnsigned ? INS_ucvtf : INS_scvtf;
4087-
break;
40884076
case SIMDIntrinsicEqual:
40894077
result = INS_cmeq;
40904078
break;
@@ -4232,11 +4220,7 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
42324220
//
42334221
void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
42344222
{
4235-
assert((simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicCast) ||
4236-
(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicConvertToSingle) ||
4237-
(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicConvertToInt32) ||
4238-
(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicConvertToDouble) ||
4239-
(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicConvertToInt64));
4223+
assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicCast);
42404224

42414225
GenTree* op1 = simdNode->Op(1);
42424226
var_types baseType = simdNode->GetSimdBaseType();

src/coreclr/jit/gentree.cpp

Lines changed: 145 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19273,21 +19273,59 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op,
1927319273

1927419274
NamedIntrinsic intrinsic = NI_Illegal;
1927519275

19276+
#if defined(TARGET_XARCH)
19277+
if (simdSize == 32)
19278+
{
19279+
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
19280+
assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));
19281+
}
19282+
#endif // TARGET_XARCH
19283+
1927619284
switch (op)
1927719285
{
1927819286
#if defined(TARGET_XARCH)
1927919287
case GT_EQ:
1928019288
{
19289+
intrinsic = (simdSize == 32) ? NI_Vector256_op_Equality : NI_Vector128_op_Equality;
19290+
break;
19291+
}
19292+
19293+
case GT_GE:
19294+
case GT_GT:
19295+
case GT_LE:
19296+
case GT_LT:
19297+
{
19298+
// We want to generate a comparison along the lines of
19299+
// GT_XX(op1, op2).As<T, TInteger>() == Vector128<TInteger>.AllBitsSet
19300+
19301+
NamedIntrinsic getAllBitsSet = NI_Illegal;
19302+
1928119303
if (simdSize == 32)
1928219304
{
19283-
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
19284-
assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));
19285-
intrinsic = NI_Vector256_op_Equality;
19305+
intrinsic = NI_Vector256_op_Equality;
19306+
getAllBitsSet = NI_Vector256_get_AllBitsSet;
1928619307
}
1928719308
else
1928819309
{
19289-
intrinsic = NI_Vector128_op_Equality;
19310+
intrinsic = NI_Vector128_op_Equality;
19311+
getAllBitsSet = NI_Vector128_get_AllBitsSet;
19312+
}
19313+
19314+
op1 = gtNewSimdCmpOpNode(op, simdBaseType, op1, op2, simdBaseJitType, simdSize,
19315+
/* isSimdAsHWIntrinsic */ false);
19316+
19317+
if (simdBaseType == TYP_FLOAT)
19318+
{
19319+
simdBaseType = TYP_INT;
19320+
simdBaseJitType = CORINFO_TYPE_INT;
19321+
}
19322+
else if (simdBaseType == TYP_DOUBLE)
19323+
{
19324+
simdBaseType = TYP_LONG;
19325+
simdBaseJitType = CORINFO_TYPE_LONG;
1929019326
}
19327+
19328+
op2 = gtNewSimdHWIntrinsicNode(simdBaseType, getAllBitsSet, simdBaseJitType, simdSize);
1929119329
break;
1929219330
}
1929319331
#elif defined(TARGET_ARM64)
@@ -19296,6 +19334,45 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode(genTreeOps op,
1929619334
intrinsic = (simdSize == 8) ? NI_Vector64_op_Equality : NI_Vector128_op_Equality;
1929719335
break;
1929819336
}
19337+
19338+
case GT_GE:
19339+
case GT_GT:
19340+
case GT_LE:
19341+
case GT_LT:
19342+
{
19343+
// We want to generate a comparison along the lines of
19344+
// GT_XX(op1, op2).As<T, TInteger>() == Vector128<TInteger>.AllBitsSet
19345+
19346+
NamedIntrinsic getAllBitsSet = NI_Illegal;
19347+
19348+
if (simdSize == 8)
19349+
{
19350+
intrinsic = NI_Vector64_op_Equality;
19351+
getAllBitsSet = NI_Vector64_get_AllBitsSet;
19352+
}
19353+
else
19354+
{
19355+
intrinsic = NI_Vector128_op_Equality;
19356+
getAllBitsSet = NI_Vector128_get_AllBitsSet;
19357+
}
19358+
19359+
op1 = gtNewSimdCmpOpNode(op, simdBaseType, op1, op2, simdBaseJitType, simdSize,
19360+
/* isSimdAsHWIntrinsic */ false);
19361+
19362+
if (simdBaseType == TYP_FLOAT)
19363+
{
19364+
simdBaseType = TYP_INT;
19365+
simdBaseJitType = CORINFO_TYPE_INT;
19366+
}
19367+
else if (simdBaseType == TYP_DOUBLE)
19368+
{
19369+
simdBaseType = TYP_LONG;
19370+
simdBaseJitType = CORINFO_TYPE_LONG;
19371+
}
19372+
19373+
op2 = gtNewSimdHWIntrinsicNode(simdBaseType, getAllBitsSet, simdBaseJitType, simdSize);
19374+
break;
19375+
}
1929919376
#else
1930019377
#error Unsupported platform
1930119378
#endif // !TARGET_XARCH && !TARGET_ARM64
@@ -19335,24 +19412,81 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode(genTreeOps op,
1933519412

1933619413
NamedIntrinsic intrinsic = NI_Illegal;
1933719414

19415+
#if defined(TARGET_XARCH)
19416+
if (simdSize == 32)
19417+
{
19418+
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
19419+
assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));
19420+
}
19421+
#endif // TARGET_XARCH
19422+
1933819423
switch (op)
1933919424
{
1934019425
#if defined(TARGET_XARCH)
19341-
case GT_NE:
19426+
case GT_EQ:
19427+
case GT_GE:
19428+
case GT_GT:
19429+
case GT_LE:
19430+
case GT_LT:
1934219431
{
19343-
if (simdSize == 32)
19432+
// We want to generate a comparison along the lines of
19433+
// GT_XX(op1, op2).As<T, TInteger>() != Vector128<TInteger>.Zero
19434+
19435+
intrinsic = (simdSize == 32) ? NI_Vector256_op_Inequality : NI_Vector128_op_Inequality;
19436+
19437+
op1 = gtNewSimdCmpOpNode(op, simdBaseType, op1, op2, simdBaseJitType, simdSize,
19438+
/* isSimdAsHWIntrinsic */ false);
19439+
19440+
if (simdBaseType == TYP_FLOAT)
1934419441
{
19345-
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
19346-
assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2));
19347-
intrinsic = NI_Vector256_op_Inequality;
19442+
simdBaseType = TYP_INT;
19443+
simdBaseJitType = CORINFO_TYPE_INT;
1934819444
}
19349-
else
19445+
else if (simdBaseType == TYP_DOUBLE)
1935019446
{
19351-
intrinsic = NI_Vector128_op_Inequality;
19447+
simdBaseType = TYP_LONG;
19448+
simdBaseJitType = CORINFO_TYPE_LONG;
1935219449
}
19450+
19451+
op2 = gtNewSimdZeroNode(simdBaseType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
19452+
break;
19453+
}
19454+
19455+
case GT_NE:
19456+
{
19457+
intrinsic = (simdSize == 32) ? NI_Vector256_op_Inequality : NI_Vector128_op_Inequality;
1935319458
break;
1935419459
}
1935519460
#elif defined(TARGET_ARM64)
19461+
case GT_EQ:
19462+
case GT_GE:
19463+
case GT_GT:
19464+
case GT_LE:
19465+
case GT_LT:
19466+
{
19467+
// We want to generate a comparison along the lines of
19468+
// GT_XX(op1, op2).As<T, TInteger>() != Vector128<TInteger>.Zero
19469+
19470+
intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality;
19471+
19472+
op1 = gtNewSimdCmpOpNode(op, simdBaseType, op1, op2, simdBaseJitType, simdSize,
19473+
/* isSimdAsHWIntrinsic */ false);
19474+
19475+
if (simdBaseType == TYP_FLOAT)
19476+
{
19477+
simdBaseType = TYP_INT;
19478+
simdBaseJitType = CORINFO_TYPE_INT;
19479+
}
19480+
else if (simdBaseType == TYP_DOUBLE)
19481+
{
19482+
simdBaseType = TYP_LONG;
19483+
simdBaseJitType = CORINFO_TYPE_LONG;
19484+
}
19485+
19486+
op2 = gtNewSimdZeroNode(simdBaseType, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
19487+
break;
19488+
}
19489+
1935619490
case GT_NE:
1935719491
{
1935819492
intrinsic = (simdSize == 8) ? NI_Vector64_op_Inequality : NI_Vector128_op_Inequality;

src/coreclr/jit/hwintrinsic.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp,
285285

286286
bool isIsaSupported = comp->compHWIntrinsicDependsOn(isa) && comp->compSupportsHWIntrinsic(isa);
287287

288-
if (strcmp(methodName, "get_IsSupported") == 0)
288+
if ((strcmp(methodName, "get_IsSupported") == 0) || (strcmp(methodName, "get_IsHardwareAccelerated") == 0))
289289
{
290290
return isIsaSupported ? (comp->compExactlyDependsOn(isa) ? NI_IsSupported_True : NI_IsSupported_Dynamic)
291291
: NI_IsSupported_False;

0 commit comments

Comments
 (0)