Formalize the notion that AVX and SSE are non-overlapping extensions from the compiler's point of view. Per email discussion, we either want to always use VEX-prefixed instructions or never use them, and are taking "HasAVX" to mean "Always use VEX". Passing -mattr=-avx,+sse42 should serve to restore legacy SSE support when desirable.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@121439 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nate Begeman 2010-12-10 00:26:57 +00:00
parent 1c952b9cc9
commit 2ea8ee7c76
8 changed files with 61 additions and 50 deletions

View File

@ -116,11 +116,11 @@ def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
FeatureFastUAMem]>; FeatureFastUAMem]>;
// Westmere is a similar machine to nehalem with some additional features. // Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
FeatureFastUAMem, FeatureAES]>; FeatureFastUAMem, FeatureAES, FeatureCLMUL]>;
// Sandy Bridge does not have FMA // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
// FIXME: Wikipedia says it does... it should have AES as well. // rather than a superset.
def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>; def : Proc<"sandybridge", [FeatureAVX, FeatureAES, FeatureCLMUL, Feature64Bit]>;
def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;

View File

@ -61,7 +61,7 @@ def RetCC_X86_32_C : CallingConv<[
// weirdly; this is really the sse-regparm calling convention) in which // weirdly; this is really the sse-regparm calling convention) in which
// case they use XMM0, otherwise it is the same as the common X86 calling // case they use XMM0, otherwise it is the same as the common X86 calling
// conv. // conv.
CCIfInReg<CCIfSubtarget<"hasSSE2()", CCIfInReg<CCIfSubtarget<"hasXMMInt()",
CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>, CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>, CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
CCDelegateTo<RetCC_X86Common> CCDelegateTo<RetCC_X86Common>
@ -73,8 +73,8 @@ def RetCC_X86_32_Fast : CallingConv<[
// SSE2. // SSE2.
// This can happen when a float, 2 x float, or 3 x float vector is split by // This can happen when a float, 2 x float, or 3 x float vector is split by
// target lowering, and is returned in 1-3 sse regs. // target lowering, and is returned in 1-3 sse regs.
CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>, CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
// For integers, ECX can be used as an extra return register // For integers, ECX can be used as an extra return register
CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>, CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
@ -163,12 +163,12 @@ def CC_X86_64_C : CallingConv<[
// registers on Darwin. // registers on Darwin.
CCIfType<[x86mmx], CCIfType<[x86mmx],
CCIfSubtarget<"isTargetDarwin()", CCIfSubtarget<"isTargetDarwin()",
CCIfSubtarget<"hasSSE2()", CCIfSubtarget<"hasXMMInt()",
CCPromoteToType<v2i64>>>>, CCPromoteToType<v2i64>>>>,
// The first 8 FP/Vector arguments are passed in XMM registers. // The first 8 FP/Vector arguments are passed in XMM registers.
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()", CCIfSubtarget<"hasXMM()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
// The first 8 256-bit vector arguments are passed in YMM registers. // The first 8 256-bit vector arguments are passed in YMM registers.
@ -245,7 +245,7 @@ def CC_X86_64_GHC : CallingConv<[
// Pass in STG registers: F1, F2, F3, F4, D1, D2 // Pass in STG registers: F1, F2, F3, F4, D1, D2
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()", CCIfSubtarget<"hasXMM()",
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>> CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
]>; ]>;
@ -263,7 +263,7 @@ def CC_X86_32_Common : CallingConv<[
// The first 3 float or double arguments, if marked 'inreg' and if the call // The first 3 float or double arguments, if marked 'inreg' and if the call
// is not a vararg call and if SSE2 is available, are passed in SSE registers. // is not a vararg call and if SSE2 is available, are passed in SSE registers.
CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64], CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
CCIfSubtarget<"hasSSE2()", CCIfSubtarget<"hasXMMInt()",
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>, CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
// The first 3 __m64 (except for v1i64) vector arguments are passed in mmx // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
@ -362,7 +362,7 @@ def CC_X86_32_FastCC : CallingConv<[
// The first 3 float or double arguments, if the call is not a vararg // The first 3 float or double arguments, if the call is not a vararg
// call and if SSE2 is available, are passed in SSE registers. // call and if SSE2 is available, are passed in SSE registers.
CCIfNotVarArg<CCIfType<[f32,f64], CCIfNotVarArg<CCIfType<[f32,f64],
CCIfSubtarget<"hasSSE2()", CCIfSubtarget<"hasXMMInt()",
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>, CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
// Doubles get 8-byte slots that are 8-byte aligned. // Doubles get 8-byte slots that are 8-byte aligned.

View File

@ -81,8 +81,8 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) { : TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<X86Subtarget>(); Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf64 = Subtarget->hasXMMInt();
X86ScalarSSEf32 = Subtarget->hasSSE1(); X86ScalarSSEf32 = Subtarget->hasXMM();
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
RegInfo = TM.getRegisterInfo(); RegInfo = TM.getRegisterInfo();
@ -356,7 +356,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
} }
if (Subtarget->hasSSE1()) if (Subtarget->hasXMM())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal); setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
// We may not have a libcall for MEMBARRIER so we should lower this. // We may not have a libcall for MEMBARRIER so we should lower this.
@ -664,7 +664,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::BITCAST, MVT::v2i32, Expand); setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
setOperationAction(ISD::BITCAST, MVT::v1i64, Expand); setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
if (!UseSoftFloat && Subtarget->hasSSE1()) { if (!UseSoftFloat && Subtarget->hasXMM()) {
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
setOperationAction(ISD::FADD, MVT::v4f32, Legal); setOperationAction(ISD::FADD, MVT::v4f32, Legal);
@ -681,7 +681,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSETCC, MVT::v4f32, Custom); setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
} }
if (!UseSoftFloat && Subtarget->hasSSE2()) { if (!UseSoftFloat && Subtarget->hasXMMInt()) {
addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
// FIXME: Unfortunately -soft-float and -no-implicit-float means XMM // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
@ -1043,7 +1043,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
} }
unsigned Align = 4; unsigned Align = 4;
if (Subtarget->hasSSE1()) if (Subtarget->hasXMM())
getMaxByValAlign(Ty, Align); getMaxByValAlign(Ty, Align);
return Align; return Align;
} }
@ -1084,7 +1084,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
} else if (!MemcpyStrSrc && Size >= 8 && } else if (!MemcpyStrSrc && Size >= 8 &&
!Subtarget->is64Bit() && !Subtarget->is64Bit() &&
Subtarget->getStackAlignment() >= 8 && Subtarget->getStackAlignment() >= 8 &&
Subtarget->hasSSE2()) { Subtarget->hasXMMInt()) {
// Do not use f64 to lower memcpy if source is string constant. It's // Do not use f64 to lower memcpy if source is string constant. It's
// better to use i32 to avoid the loads. // better to use i32 to avoid the loads.
return MVT::f64; return MVT::f64;
@ -1272,14 +1272,14 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// or SSE or MMX vectors. // or SSE or MMX vectors.
if ((ValVT == MVT::f32 || ValVT == MVT::f64 || if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) && VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
(Subtarget->is64Bit() && !Subtarget->hasSSE1())) { (Subtarget->is64Bit() && !Subtarget->hasXMM())) {
report_fatal_error("SSE register return with SSE disabled"); report_fatal_error("SSE register return with SSE disabled");
} }
// Likewise we can't return F64 values with SSE1 only. gcc does so, but // Likewise we can't return F64 values with SSE1 only. gcc does so, but
// llvm-gcc has never done it right and no one has noticed, so this // llvm-gcc has never done it right and no one has noticed, so this
// should be OK for now. // should be OK for now.
if (ValVT == MVT::f64 && if (ValVT == MVT::f64 &&
(Subtarget->is64Bit() && !Subtarget->hasSSE2())) (Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
report_fatal_error("SSE2 register return with SSE2 disabled"); report_fatal_error("SSE2 register return with SSE2 disabled");
// Returns in ST0/ST1 are handled specially: these are pushed as operands to // Returns in ST0/ST1 are handled specially: these are pushed as operands to
@ -1391,7 +1391,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
// If this is x86-64, and we disabled SSE, we can't return FP values // If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
report_fatal_error("SSE register return with SSE disabled"); report_fatal_error("SSE register return with SSE disabled");
} }
@ -1700,11 +1700,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
TotalNumIntRegs); TotalNumIntRegs);
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
"SSE register cannot be used when SSE is disabled!"); "SSE register cannot be used when SSE is disabled!");
assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) && assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
"SSE register cannot be used when SSE is disabled!"); "SSE register cannot be used when SSE is disabled!");
if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1()) if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
// Kernel mode asks for SSE to be disabled, so don't push them // Kernel mode asks for SSE to be disabled, so don't push them
// on the stack. // on the stack.
TotalNumXMMRegs = 0; TotalNumXMMRegs = 0;
@ -2055,7 +2055,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
}; };
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
assert((Subtarget->hasSSE1() || !NumXMMRegs) assert((Subtarget->hasXMM() || !NumXMMRegs)
&& "SSE registers cannot be used when SSE is disabled"); && "SSE registers cannot be used when SSE is disabled");
Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
@ -7635,7 +7635,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
assert(!UseSoftFloat && assert(!UseSoftFloat &&
!(DAG.getMachineFunction() !(DAG.getMachineFunction()
.getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) && .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
Subtarget->hasSSE1()); Subtarget->hasXMM());
} }
// Insert VAARG_64 node into the DAG // Insert VAARG_64 node into the DAG
@ -11689,7 +11689,7 @@ TargetLowering::ConstraintWeight
break; break;
case 'x': case 'x':
case 'Y': case 'Y':
if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM())
weight = CW_Register; weight = CW_Register;
break; break;
case 'I': case 'I':
@ -11759,9 +11759,9 @@ LowerXConstraint(EVT ConstraintVT) const {
// FP X constraints get lowered to SSE1/2 registers if available, otherwise // FP X constraints get lowered to SSE1/2 registers if available, otherwise
// 'f' like normal targets. // 'f' like normal targets.
if (ConstraintVT.isFloatingPoint()) { if (ConstraintVT.isFloatingPoint()) {
if (Subtarget->hasSSE2()) if (Subtarget->hasXMMInt())
return "Y"; return "Y";
if (Subtarget->hasSSE1()) if (Subtarget->hasXMM())
return "x"; return "x";
} }
@ -11991,10 +11991,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
if (!Subtarget->hasMMX()) break; if (!Subtarget->hasMMX()) break;
return std::make_pair(0U, X86::VR64RegisterClass); return std::make_pair(0U, X86::VR64RegisterClass);
case 'Y': // SSE_REGS if SSE2 allowed case 'Y': // SSE_REGS if SSE2 allowed
if (!Subtarget->hasSSE2()) break; if (!Subtarget->hasXMMInt()) break;
// FALL THROUGH. // FALL THROUGH.
case 'x': // SSE_REGS if SSE1 allowed case 'x': // SSE_REGS if SSE1 allowed
if (!Subtarget->hasSSE1()) break; if (!Subtarget->hasXMM()) break;
switch (VT.getSimpleVT().SimpleTy) { switch (VT.getSimpleVT().SimpleTy) {
default: break; default: break;

View File

@ -400,26 +400,26 @@ def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
def HasCMov : Predicate<"Subtarget->hasCMov()">; def HasCMov : Predicate<"Subtarget->hasCMov()">;
def NoCMov : Predicate<"!Subtarget->hasCMov()">; def NoCMov : Predicate<"!Subtarget->hasCMov()">;
// FIXME: temporary hack to let codegen assert or generate poor code in case def HasMMX : Predicate<"Subtarget->hasMMX()">;
// no AVX version of the desired intructions is present, this is better for
// incremental dev (without fallbacks it's easier to spot what's missing)
def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">;
def Has3DNow : Predicate<"Subtarget->has3DNow()">; def Has3DNow : Predicate<"Subtarget->has3DNow()">;
def Has3DNowA : Predicate<"Subtarget->has3DNowA()">; def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">; def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">; def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">; def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">; def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
def HasSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">; def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
def HasSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">; def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
def HasSSE4A : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">; def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">; def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">; def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf32 : Predicate<"!Subtarget->hasXMM()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate; def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate;
def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate; def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
@ -436,7 +436,6 @@ def OptForSize : Predicate<"OptForSize">;
def OptForSpeed : Predicate<"!OptForSize">; def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
def HasAES : Predicate<"Subtarget->hasAES()">;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions. // X86 Instruction Format Definitions.

View File

@ -712,6 +712,8 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V; []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
} }
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>;
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}", "cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround FR64:$src))]>; [(set FR32:$dst, (fround FR64:$src))]>;
@ -739,6 +741,8 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>; []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
} }
def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>;
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}", "cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fextend FR32:$src))]>, XS, [(set FR64:$dst, (fextend FR32:$src))]>, XS,
@ -3680,7 +3684,7 @@ let Predicates = [HasSSE2] in
(CVTSS2SDrm addr:$src)>; (CVTSS2SDrm addr:$src)>;
// bit_convert // bit_convert
let Predicates = [HasSSE2] in { let Predicates = [HasXMMInt] in {
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
@ -3713,6 +3717,10 @@ let Predicates = [HasSSE2] in {
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
} }
let Predicates = [HasAVX] in {
def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
}
// Move scalar to XMM zero-extended // Move scalar to XMM zero-extended
// movd to XMM register zero-extends // movd to XMM register zero-extends
let AddedComplexity = 15 in { let AddedComplexity = 15 in {

View File

@ -256,13 +256,13 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
if ((ECX >> 9) & 1) X86SSELevel = SSSE3; if ((ECX >> 9) & 1) X86SSELevel = SSSE3;
if ((ECX >> 19) & 1) X86SSELevel = SSE41; if ((ECX >> 19) & 1) X86SSELevel = SSE41;
if ((ECX >> 20) & 1) X86SSELevel = SSE42; if ((ECX >> 20) & 1) X86SSELevel = SSE42;
if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; }
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
HasCLMUL = IsIntel && ((ECX >> 1) & 0x1); HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
HasAVX = ((ECX >> 28) & 0x1);
HasAES = IsIntel && ((ECX >> 25) & 0x1); HasAES = IsIntel && ((ECX >> 25) & 0x1);
if (IsIntel || IsAMD) { if (IsIntel || IsAMD) {
@ -316,11 +316,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
ParseSubtargetFeatures(FS, CPU); ParseSubtargetFeatures(FS, CPU);
// All X86-64 CPUs also have SSE2, however user might request no SSE via // All X86-64 CPUs also have SSE2, however user might request no SSE via
// -mattr, so don't force SSELevel here. // -mattr, so don't force SSELevel here.
if (HasAVX)
X86SSELevel = NoMMXSSE;
} else { } else {
// Otherwise, use CPUID to auto-detect feature set. // Otherwise, use CPUID to auto-detect feature set.
AutoDetectSubtargetFeatures(); AutoDetectSubtargetFeatures();
// Make sure SSE2 is enabled; it is available on all X86-64 CPUs. // Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
if (Is64Bit && X86SSELevel < SSE2) if (Is64Bit && !HasAVX && X86SSELevel < SSE2)
X86SSELevel = SSE2; X86SSELevel = SSE2;
} }

View File

@ -155,6 +155,8 @@ public:
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool hasPOPCNT() const { return HasPOPCNT; } bool hasPOPCNT() const { return HasPOPCNT; }
bool hasAVX() const { return HasAVX; } bool hasAVX() const { return HasAVX; }
bool hasXMM() const { return hasSSE1() || hasAVX(); }
bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
bool hasAES() const { return HasAES; } bool hasAES() const { return HasAES; }
bool hasCLMUL() const { return HasCLMUL; } bool hasCLMUL() const { return HasCLMUL; }
bool hasFMA3() const { return HasFMA3; } bool hasFMA3() const { return HasFMA3; }

View File

@ -4,7 +4,7 @@
define void @zero() nounwind ssp { define void @zero() nounwind ssp {
entry: entry:
; CHECK: vpxor ; CHECK: vxorps
; CHECK: vmovaps ; CHECK: vmovaps
store <4 x float> zeroinitializer, <4 x float>* @z, align 16 store <4 x float> zeroinitializer, <4 x float>* @z, align 16
ret void ret void