mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-12 13:30:51 +00:00
Formalize the notion that AVX and SSE are non-overlapping extensions from the compiler's point of view. Per email discussion, we either want to always use VEX-prefixed instructions or never use them, and are taking "HasAVX" to mean "Always use VEX". Passing -mattr=-avx,+sse42 should serve to restore legacy SSE support when desirable.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@121439 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1c952b9cc9
commit
2ea8ee7c76
@ -116,11 +116,11 @@ def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
||||
FeatureFastUAMem]>;
|
||||
// Westmere is a similar machine to nehalem with some additional features.
|
||||
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
|
||||
def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
||||
FeatureFastUAMem, FeatureAES]>;
|
||||
// Sandy Bridge does not have FMA
|
||||
// FIXME: Wikipedia says it does... it should have AES as well.
|
||||
def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
|
||||
def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
||||
FeatureFastUAMem, FeatureAES, FeatureCLMUL]>;
|
||||
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
|
||||
// rather than a superset.
|
||||
def : Proc<"sandybridge", [FeatureAVX, FeatureAES, FeatureCLMUL, Feature64Bit]>;
|
||||
|
||||
def : Proc<"k6", [FeatureMMX]>;
|
||||
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
|
||||
|
@ -61,7 +61,7 @@ def RetCC_X86_32_C : CallingConv<[
|
||||
// weirdly; this is really the sse-regparm calling convention) in which
|
||||
// case they use XMM0, otherwise it is the same as the common X86 calling
|
||||
// conv.
|
||||
CCIfInReg<CCIfSubtarget<"hasSSE2()",
|
||||
CCIfInReg<CCIfSubtarget<"hasXMMInt()",
|
||||
CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
|
||||
CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
|
||||
CCDelegateTo<RetCC_X86Common>
|
||||
@ -73,8 +73,8 @@ def RetCC_X86_32_Fast : CallingConv<[
|
||||
// SSE2.
|
||||
// This can happen when a float, 2 x float, or 3 x float vector is split by
|
||||
// target lowering, and is returned in 1-3 sse regs.
|
||||
CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
|
||||
CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
|
||||
CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
|
||||
CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
|
||||
|
||||
// For integers, ECX can be used as an extra return register
|
||||
CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
|
||||
@ -163,12 +163,12 @@ def CC_X86_64_C : CallingConv<[
|
||||
// registers on Darwin.
|
||||
CCIfType<[x86mmx],
|
||||
CCIfSubtarget<"isTargetDarwin()",
|
||||
CCIfSubtarget<"hasSSE2()",
|
||||
CCIfSubtarget<"hasXMMInt()",
|
||||
CCPromoteToType<v2i64>>>>,
|
||||
|
||||
// The first 8 FP/Vector arguments are passed in XMM registers.
|
||||
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCIfSubtarget<"hasSSE1()",
|
||||
CCIfSubtarget<"hasXMM()",
|
||||
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
|
||||
|
||||
// The first 8 256-bit vector arguments are passed in YMM registers.
|
||||
@ -245,7 +245,7 @@ def CC_X86_64_GHC : CallingConv<[
|
||||
|
||||
// Pass in STG registers: F1, F2, F3, F4, D1, D2
|
||||
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCIfSubtarget<"hasSSE1()",
|
||||
CCIfSubtarget<"hasXMM()",
|
||||
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
|
||||
]>;
|
||||
|
||||
@ -263,7 +263,7 @@ def CC_X86_32_Common : CallingConv<[
|
||||
// The first 3 float or double arguments, if marked 'inreg' and if the call
|
||||
// is not a vararg call and if SSE2 is available, are passed in SSE registers.
|
||||
CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
|
||||
CCIfSubtarget<"hasSSE2()",
|
||||
CCIfSubtarget<"hasXMMInt()",
|
||||
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
|
||||
|
||||
// The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
|
||||
@ -362,7 +362,7 @@ def CC_X86_32_FastCC : CallingConv<[
|
||||
// The first 3 float or double arguments, if the call is not a vararg
|
||||
// call and if SSE2 is available, are passed in SSE registers.
|
||||
CCIfNotVarArg<CCIfType<[f32,f64],
|
||||
CCIfSubtarget<"hasSSE2()",
|
||||
CCIfSubtarget<"hasXMMInt()",
|
||||
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
|
||||
|
||||
// Doubles get 8-byte slots that are 8-byte aligned.
|
||||
|
@ -81,8 +81,8 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
|
||||
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
: TargetLowering(TM, createTLOF(TM)) {
|
||||
Subtarget = &TM.getSubtarget<X86Subtarget>();
|
||||
X86ScalarSSEf64 = Subtarget->hasSSE2();
|
||||
X86ScalarSSEf32 = Subtarget->hasSSE1();
|
||||
X86ScalarSSEf64 = Subtarget->hasXMMInt();
|
||||
X86ScalarSSEf32 = Subtarget->hasXMM();
|
||||
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
|
||||
|
||||
RegInfo = TM.getRegisterInfo();
|
||||
@ -356,7 +356,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
|
||||
}
|
||||
|
||||
if (Subtarget->hasSSE1())
|
||||
if (Subtarget->hasXMM())
|
||||
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
|
||||
|
||||
// We may not have a libcall for MEMBARRIER so we should lower this.
|
||||
@ -664,7 +664,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
|
||||
setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
|
||||
|
||||
if (!UseSoftFloat && Subtarget->hasSSE1()) {
|
||||
if (!UseSoftFloat && Subtarget->hasXMM()) {
|
||||
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
|
||||
|
||||
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
|
||||
@ -681,7 +681,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
|
||||
}
|
||||
|
||||
if (!UseSoftFloat && Subtarget->hasSSE2()) {
|
||||
if (!UseSoftFloat && Subtarget->hasXMMInt()) {
|
||||
addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
|
||||
|
||||
// FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
|
||||
@ -1043,7 +1043,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
|
||||
}
|
||||
|
||||
unsigned Align = 4;
|
||||
if (Subtarget->hasSSE1())
|
||||
if (Subtarget->hasXMM())
|
||||
getMaxByValAlign(Ty, Align);
|
||||
return Align;
|
||||
}
|
||||
@ -1084,7 +1084,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
|
||||
} else if (!MemcpyStrSrc && Size >= 8 &&
|
||||
!Subtarget->is64Bit() &&
|
||||
Subtarget->getStackAlignment() >= 8 &&
|
||||
Subtarget->hasSSE2()) {
|
||||
Subtarget->hasXMMInt()) {
|
||||
// Do not use f64 to lower memcpy if source is string constant. It's
|
||||
// better to use i32 to avoid the loads.
|
||||
return MVT::f64;
|
||||
@ -1272,14 +1272,14 @@ X86TargetLowering::LowerReturn(SDValue Chain,
|
||||
// or SSE or MMX vectors.
|
||||
if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
|
||||
VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
|
||||
(Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
|
||||
(Subtarget->is64Bit() && !Subtarget->hasXMM())) {
|
||||
report_fatal_error("SSE register return with SSE disabled");
|
||||
}
|
||||
// Likewise we can't return F64 values with SSE1 only. gcc does so, but
|
||||
// llvm-gcc has never done it right and no one has noticed, so this
|
||||
// should be OK for now.
|
||||
if (ValVT == MVT::f64 &&
|
||||
(Subtarget->is64Bit() && !Subtarget->hasSSE2()))
|
||||
(Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
|
||||
report_fatal_error("SSE2 register return with SSE2 disabled");
|
||||
|
||||
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
|
||||
@ -1391,7 +1391,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
|
||||
|
||||
// If this is x86-64, and we disabled SSE, we can't return FP values
|
||||
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
|
||||
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
|
||||
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
|
||||
report_fatal_error("SSE register return with SSE disabled");
|
||||
}
|
||||
|
||||
@ -1700,11 +1700,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
|
||||
TotalNumIntRegs);
|
||||
|
||||
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
|
||||
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
|
||||
assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
|
||||
"SSE register cannot be used when SSE is disabled!");
|
||||
assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
|
||||
"SSE register cannot be used when SSE is disabled!");
|
||||
if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
|
||||
if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
|
||||
// Kernel mode asks for SSE to be disabled, so don't push them
|
||||
// on the stack.
|
||||
TotalNumXMMRegs = 0;
|
||||
@ -2055,7 +2055,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
|
||||
};
|
||||
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
|
||||
assert((Subtarget->hasSSE1() || !NumXMMRegs)
|
||||
assert((Subtarget->hasXMM() || !NumXMMRegs)
|
||||
&& "SSE registers cannot be used when SSE is disabled");
|
||||
|
||||
Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
|
||||
@ -7635,7 +7635,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
|
||||
assert(!UseSoftFloat &&
|
||||
!(DAG.getMachineFunction()
|
||||
.getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
|
||||
Subtarget->hasSSE1());
|
||||
Subtarget->hasXMM());
|
||||
}
|
||||
|
||||
// Insert VAARG_64 node into the DAG
|
||||
@ -11689,7 +11689,7 @@ TargetLowering::ConstraintWeight
|
||||
break;
|
||||
case 'x':
|
||||
case 'Y':
|
||||
if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1())
|
||||
if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM())
|
||||
weight = CW_Register;
|
||||
break;
|
||||
case 'I':
|
||||
@ -11759,9 +11759,9 @@ LowerXConstraint(EVT ConstraintVT) const {
|
||||
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
|
||||
// 'f' like normal targets.
|
||||
if (ConstraintVT.isFloatingPoint()) {
|
||||
if (Subtarget->hasSSE2())
|
||||
if (Subtarget->hasXMMInt())
|
||||
return "Y";
|
||||
if (Subtarget->hasSSE1())
|
||||
if (Subtarget->hasXMM())
|
||||
return "x";
|
||||
}
|
||||
|
||||
@ -11991,10 +11991,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
|
||||
if (!Subtarget->hasMMX()) break;
|
||||
return std::make_pair(0U, X86::VR64RegisterClass);
|
||||
case 'Y': // SSE_REGS if SSE2 allowed
|
||||
if (!Subtarget->hasSSE2()) break;
|
||||
if (!Subtarget->hasXMMInt()) break;
|
||||
// FALL THROUGH.
|
||||
case 'x': // SSE_REGS if SSE1 allowed
|
||||
if (!Subtarget->hasSSE1()) break;
|
||||
if (!Subtarget->hasXMM()) break;
|
||||
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default: break;
|
||||
|
@ -400,26 +400,26 @@ def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
|
||||
def HasCMov : Predicate<"Subtarget->hasCMov()">;
|
||||
def NoCMov : Predicate<"!Subtarget->hasCMov()">;
|
||||
|
||||
// FIXME: temporary hack to let codegen assert or generate poor code in case
|
||||
// no AVX version of the desired intructions is present, this is better for
|
||||
// incremental dev (without fallbacks it's easier to spot what's missing)
|
||||
def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">;
|
||||
def HasMMX : Predicate<"Subtarget->hasMMX()">;
|
||||
def Has3DNow : Predicate<"Subtarget->has3DNow()">;
|
||||
def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
|
||||
def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
|
||||
def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
|
||||
def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
|
||||
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
|
||||
def HasSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
|
||||
def HasSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
|
||||
def HasSSE4A : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">;
|
||||
def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
|
||||
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
|
||||
def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
|
||||
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
|
||||
def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
|
||||
def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
|
||||
def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
|
||||
|
||||
def HasAVX : Predicate<"Subtarget->hasAVX()">;
|
||||
def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
|
||||
|
||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||
def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
|
||||
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
|
||||
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
|
||||
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
|
||||
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
|
||||
def FPStackf32 : Predicate<"!Subtarget->hasXMM()">;
|
||||
def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">;
|
||||
def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate;
|
||||
def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate;
|
||||
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
|
||||
@ -436,7 +436,6 @@ def OptForSize : Predicate<"OptForSize">;
|
||||
def OptForSpeed : Predicate<"!OptForSize">;
|
||||
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
|
||||
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
|
||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Instruction Format Definitions.
|
||||
|
@ -712,6 +712,8 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
|
||||
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[]>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
|
||||
}
|
||||
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>;
|
||||
|
||||
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
|
||||
"cvtsd2ss\t{$src, $dst|$dst, $src}",
|
||||
[(set FR32:$dst, (fround FR64:$src))]>;
|
||||
@ -739,6 +741,8 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
|
||||
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[]>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
|
||||
}
|
||||
def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>;
|
||||
|
||||
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
|
||||
"cvtss2sd\t{$src, $dst|$dst, $src}",
|
||||
[(set FR64:$dst, (fextend FR32:$src))]>, XS,
|
||||
@ -3680,7 +3684,7 @@ let Predicates = [HasSSE2] in
|
||||
(CVTSS2SDrm addr:$src)>;
|
||||
|
||||
// bit_convert
|
||||
let Predicates = [HasSSE2] in {
|
||||
let Predicates = [HasXMMInt] in {
|
||||
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
|
||||
@ -3713,6 +3717,10 @@ let Predicates = [HasSSE2] in {
|
||||
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
|
||||
}
|
||||
|
||||
// Move scalar to XMM zero-extended
|
||||
// movd to XMM register zero-extends
|
||||
let AddedComplexity = 15 in {
|
||||
|
@ -256,13 +256,13 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
|
||||
if ((ECX >> 9) & 1) X86SSELevel = SSSE3;
|
||||
if ((ECX >> 19) & 1) X86SSELevel = SSE41;
|
||||
if ((ECX >> 20) & 1) X86SSELevel = SSE42;
|
||||
if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; }
|
||||
|
||||
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
|
||||
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
|
||||
|
||||
HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
|
||||
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
|
||||
HasAVX = ((ECX >> 28) & 0x1);
|
||||
HasAES = IsIntel && ((ECX >> 25) & 0x1);
|
||||
|
||||
if (IsIntel || IsAMD) {
|
||||
@ -316,11 +316,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
|
||||
ParseSubtargetFeatures(FS, CPU);
|
||||
// All X86-64 CPUs also have SSE2, however user might request no SSE via
|
||||
// -mattr, so don't force SSELevel here.
|
||||
if (HasAVX)
|
||||
X86SSELevel = NoMMXSSE;
|
||||
} else {
|
||||
// Otherwise, use CPUID to auto-detect feature set.
|
||||
AutoDetectSubtargetFeatures();
|
||||
// Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
|
||||
if (Is64Bit && X86SSELevel < SSE2)
|
||||
if (Is64Bit && !HasAVX && X86SSELevel < SSE2)
|
||||
X86SSELevel = SSE2;
|
||||
}
|
||||
|
||||
|
@ -155,6 +155,8 @@ public:
|
||||
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
|
||||
bool hasPOPCNT() const { return HasPOPCNT; }
|
||||
bool hasAVX() const { return HasAVX; }
|
||||
bool hasXMM() const { return hasSSE1() || hasAVX(); }
|
||||
bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
|
||||
bool hasAES() const { return HasAES; }
|
||||
bool hasCLMUL() const { return HasCLMUL; }
|
||||
bool hasFMA3() const { return HasFMA3; }
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
define void @zero() nounwind ssp {
|
||||
entry:
|
||||
; CHECK: vpxor
|
||||
; CHECK: vxorps
|
||||
; CHECK: vmovaps
|
||||
store <4 x float> zeroinitializer, <4 x float>* @z, align 16
|
||||
ret void
|
||||
|
Loading…
Reference in New Issue
Block a user