mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 04:30:23 +00:00
Formalize the notion that AVX and SSE are non-overlapping extensions from the compiler's point of view. Per email discussion, we either want to always use VEX-prefixed instructions or never use them, and are taking "HasAVX" to mean "Always use VEX". Passing -mattr=-avx,+sse42 should serve to restore legacy SSE support when desirable.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@121439 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1c952b9cc9
commit
2ea8ee7c76
@ -116,11 +116,11 @@ def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
|||||||
FeatureFastUAMem]>;
|
FeatureFastUAMem]>;
|
||||||
// Westmere is a similar machine to nehalem with some additional features.
|
// Westmere is a similar machine to nehalem with some additional features.
|
||||||
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
|
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
|
||||||
def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
||||||
FeatureFastUAMem, FeatureAES]>;
|
FeatureFastUAMem, FeatureAES, FeatureCLMUL]>;
|
||||||
// Sandy Bridge does not have FMA
|
// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
|
||||||
// FIXME: Wikipedia says it does... it should have AES as well.
|
// rather than a superset.
|
||||||
def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
|
def : Proc<"sandybridge", [FeatureAVX, FeatureAES, FeatureCLMUL, Feature64Bit]>;
|
||||||
|
|
||||||
def : Proc<"k6", [FeatureMMX]>;
|
def : Proc<"k6", [FeatureMMX]>;
|
||||||
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
|
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
|
||||||
|
@ -61,7 +61,7 @@ def RetCC_X86_32_C : CallingConv<[
|
|||||||
// weirdly; this is really the sse-regparm calling convention) in which
|
// weirdly; this is really the sse-regparm calling convention) in which
|
||||||
// case they use XMM0, otherwise it is the same as the common X86 calling
|
// case they use XMM0, otherwise it is the same as the common X86 calling
|
||||||
// conv.
|
// conv.
|
||||||
CCIfInReg<CCIfSubtarget<"hasSSE2()",
|
CCIfInReg<CCIfSubtarget<"hasXMMInt()",
|
||||||
CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
|
CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
|
||||||
CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
|
CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
|
||||||
CCDelegateTo<RetCC_X86Common>
|
CCDelegateTo<RetCC_X86Common>
|
||||||
@ -73,8 +73,8 @@ def RetCC_X86_32_Fast : CallingConv<[
|
|||||||
// SSE2.
|
// SSE2.
|
||||||
// This can happen when a float, 2 x float, or 3 x float vector is split by
|
// This can happen when a float, 2 x float, or 3 x float vector is split by
|
||||||
// target lowering, and is returned in 1-3 sse regs.
|
// target lowering, and is returned in 1-3 sse regs.
|
||||||
CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
|
CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
|
||||||
CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
|
CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
|
||||||
|
|
||||||
// For integers, ECX can be used as an extra return register
|
// For integers, ECX can be used as an extra return register
|
||||||
CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
|
CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
|
||||||
@ -163,12 +163,12 @@ def CC_X86_64_C : CallingConv<[
|
|||||||
// registers on Darwin.
|
// registers on Darwin.
|
||||||
CCIfType<[x86mmx],
|
CCIfType<[x86mmx],
|
||||||
CCIfSubtarget<"isTargetDarwin()",
|
CCIfSubtarget<"isTargetDarwin()",
|
||||||
CCIfSubtarget<"hasSSE2()",
|
CCIfSubtarget<"hasXMMInt()",
|
||||||
CCPromoteToType<v2i64>>>>,
|
CCPromoteToType<v2i64>>>>,
|
||||||
|
|
||||||
// The first 8 FP/Vector arguments are passed in XMM registers.
|
// The first 8 FP/Vector arguments are passed in XMM registers.
|
||||||
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||||
CCIfSubtarget<"hasSSE1()",
|
CCIfSubtarget<"hasXMM()",
|
||||||
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
|
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
|
||||||
|
|
||||||
// The first 8 256-bit vector arguments are passed in YMM registers.
|
// The first 8 256-bit vector arguments are passed in YMM registers.
|
||||||
@ -245,7 +245,7 @@ def CC_X86_64_GHC : CallingConv<[
|
|||||||
|
|
||||||
// Pass in STG registers: F1, F2, F3, F4, D1, D2
|
// Pass in STG registers: F1, F2, F3, F4, D1, D2
|
||||||
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||||
CCIfSubtarget<"hasSSE1()",
|
CCIfSubtarget<"hasXMM()",
|
||||||
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
|
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
@ -263,7 +263,7 @@ def CC_X86_32_Common : CallingConv<[
|
|||||||
// The first 3 float or double arguments, if marked 'inreg' and if the call
|
// The first 3 float or double arguments, if marked 'inreg' and if the call
|
||||||
// is not a vararg call and if SSE2 is available, are passed in SSE registers.
|
// is not a vararg call and if SSE2 is available, are passed in SSE registers.
|
||||||
CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
|
CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
|
||||||
CCIfSubtarget<"hasSSE2()",
|
CCIfSubtarget<"hasXMMInt()",
|
||||||
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
|
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
|
||||||
|
|
||||||
// The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
|
// The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
|
||||||
@ -362,7 +362,7 @@ def CC_X86_32_FastCC : CallingConv<[
|
|||||||
// The first 3 float or double arguments, if the call is not a vararg
|
// The first 3 float or double arguments, if the call is not a vararg
|
||||||
// call and if SSE2 is available, are passed in SSE registers.
|
// call and if SSE2 is available, are passed in SSE registers.
|
||||||
CCIfNotVarArg<CCIfType<[f32,f64],
|
CCIfNotVarArg<CCIfType<[f32,f64],
|
||||||
CCIfSubtarget<"hasSSE2()",
|
CCIfSubtarget<"hasXMMInt()",
|
||||||
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
|
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
|
||||||
|
|
||||||
// Doubles get 8-byte slots that are 8-byte aligned.
|
// Doubles get 8-byte slots that are 8-byte aligned.
|
||||||
|
@ -81,8 +81,8 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
|
|||||||
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||||
: TargetLowering(TM, createTLOF(TM)) {
|
: TargetLowering(TM, createTLOF(TM)) {
|
||||||
Subtarget = &TM.getSubtarget<X86Subtarget>();
|
Subtarget = &TM.getSubtarget<X86Subtarget>();
|
||||||
X86ScalarSSEf64 = Subtarget->hasSSE2();
|
X86ScalarSSEf64 = Subtarget->hasXMMInt();
|
||||||
X86ScalarSSEf32 = Subtarget->hasSSE1();
|
X86ScalarSSEf32 = Subtarget->hasXMM();
|
||||||
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
|
X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
|
||||||
|
|
||||||
RegInfo = TM.getRegisterInfo();
|
RegInfo = TM.getRegisterInfo();
|
||||||
@ -356,7 +356,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||||||
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
|
setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Subtarget->hasSSE1())
|
if (Subtarget->hasXMM())
|
||||||
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
|
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
|
||||||
|
|
||||||
// We may not have a libcall for MEMBARRIER so we should lower this.
|
// We may not have a libcall for MEMBARRIER so we should lower this.
|
||||||
@ -664,7 +664,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||||||
setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
|
setOperationAction(ISD::BITCAST, MVT::v2i32, Expand);
|
||||||
setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
|
setOperationAction(ISD::BITCAST, MVT::v1i64, Expand);
|
||||||
|
|
||||||
if (!UseSoftFloat && Subtarget->hasSSE1()) {
|
if (!UseSoftFloat && Subtarget->hasXMM()) {
|
||||||
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
|
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
|
||||||
|
|
||||||
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
|
setOperationAction(ISD::FADD, MVT::v4f32, Legal);
|
||||||
@ -681,7 +681,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
|||||||
setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
|
setOperationAction(ISD::VSETCC, MVT::v4f32, Custom);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!UseSoftFloat && Subtarget->hasSSE2()) {
|
if (!UseSoftFloat && Subtarget->hasXMMInt()) {
|
||||||
addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
|
addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
|
||||||
|
|
||||||
// FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
|
// FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
|
||||||
@ -1043,7 +1043,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsigned Align = 4;
|
unsigned Align = 4;
|
||||||
if (Subtarget->hasSSE1())
|
if (Subtarget->hasXMM())
|
||||||
getMaxByValAlign(Ty, Align);
|
getMaxByValAlign(Ty, Align);
|
||||||
return Align;
|
return Align;
|
||||||
}
|
}
|
||||||
@ -1084,7 +1084,7 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
|
|||||||
} else if (!MemcpyStrSrc && Size >= 8 &&
|
} else if (!MemcpyStrSrc && Size >= 8 &&
|
||||||
!Subtarget->is64Bit() &&
|
!Subtarget->is64Bit() &&
|
||||||
Subtarget->getStackAlignment() >= 8 &&
|
Subtarget->getStackAlignment() >= 8 &&
|
||||||
Subtarget->hasSSE2()) {
|
Subtarget->hasXMMInt()) {
|
||||||
// Do not use f64 to lower memcpy if source is string constant. It's
|
// Do not use f64 to lower memcpy if source is string constant. It's
|
||||||
// better to use i32 to avoid the loads.
|
// better to use i32 to avoid the loads.
|
||||||
return MVT::f64;
|
return MVT::f64;
|
||||||
@ -1272,14 +1272,14 @@ X86TargetLowering::LowerReturn(SDValue Chain,
|
|||||||
// or SSE or MMX vectors.
|
// or SSE or MMX vectors.
|
||||||
if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
|
if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
|
||||||
VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
|
VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
|
||||||
(Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
|
(Subtarget->is64Bit() && !Subtarget->hasXMM())) {
|
||||||
report_fatal_error("SSE register return with SSE disabled");
|
report_fatal_error("SSE register return with SSE disabled");
|
||||||
}
|
}
|
||||||
// Likewise we can't return F64 values with SSE1 only. gcc does so, but
|
// Likewise we can't return F64 values with SSE1 only. gcc does so, but
|
||||||
// llvm-gcc has never done it right and no one has noticed, so this
|
// llvm-gcc has never done it right and no one has noticed, so this
|
||||||
// should be OK for now.
|
// should be OK for now.
|
||||||
if (ValVT == MVT::f64 &&
|
if (ValVT == MVT::f64 &&
|
||||||
(Subtarget->is64Bit() && !Subtarget->hasSSE2()))
|
(Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
|
||||||
report_fatal_error("SSE2 register return with SSE2 disabled");
|
report_fatal_error("SSE2 register return with SSE2 disabled");
|
||||||
|
|
||||||
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
|
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
|
||||||
@ -1391,7 +1391,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
|
|||||||
|
|
||||||
// If this is x86-64, and we disabled SSE, we can't return FP values
|
// If this is x86-64, and we disabled SSE, we can't return FP values
|
||||||
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
|
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
|
||||||
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
|
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
|
||||||
report_fatal_error("SSE register return with SSE disabled");
|
report_fatal_error("SSE register return with SSE disabled");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1700,11 +1700,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
|
|||||||
TotalNumIntRegs);
|
TotalNumIntRegs);
|
||||||
|
|
||||||
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
|
bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
|
||||||
assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
|
assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
|
||||||
"SSE register cannot be used when SSE is disabled!");
|
"SSE register cannot be used when SSE is disabled!");
|
||||||
assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
|
assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
|
||||||
"SSE register cannot be used when SSE is disabled!");
|
"SSE register cannot be used when SSE is disabled!");
|
||||||
if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
|
if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
|
||||||
// Kernel mode asks for SSE to be disabled, so don't push them
|
// Kernel mode asks for SSE to be disabled, so don't push them
|
||||||
// on the stack.
|
// on the stack.
|
||||||
TotalNumXMMRegs = 0;
|
TotalNumXMMRegs = 0;
|
||||||
@ -2055,7 +2055,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
|||||||
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
|
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
|
||||||
};
|
};
|
||||||
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
|
unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
|
||||||
assert((Subtarget->hasSSE1() || !NumXMMRegs)
|
assert((Subtarget->hasXMM() || !NumXMMRegs)
|
||||||
&& "SSE registers cannot be used when SSE is disabled");
|
&& "SSE registers cannot be used when SSE is disabled");
|
||||||
|
|
||||||
Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
|
Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
|
||||||
@ -7635,7 +7635,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
assert(!UseSoftFloat &&
|
assert(!UseSoftFloat &&
|
||||||
!(DAG.getMachineFunction()
|
!(DAG.getMachineFunction()
|
||||||
.getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
|
.getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
|
||||||
Subtarget->hasSSE1());
|
Subtarget->hasXMM());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Insert VAARG_64 node into the DAG
|
// Insert VAARG_64 node into the DAG
|
||||||
@ -11689,7 +11689,7 @@ TargetLowering::ConstraintWeight
|
|||||||
break;
|
break;
|
||||||
case 'x':
|
case 'x':
|
||||||
case 'Y':
|
case 'Y':
|
||||||
if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1())
|
if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM())
|
||||||
weight = CW_Register;
|
weight = CW_Register;
|
||||||
break;
|
break;
|
||||||
case 'I':
|
case 'I':
|
||||||
@ -11759,9 +11759,9 @@ LowerXConstraint(EVT ConstraintVT) const {
|
|||||||
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
|
// FP X constraints get lowered to SSE1/2 registers if available, otherwise
|
||||||
// 'f' like normal targets.
|
// 'f' like normal targets.
|
||||||
if (ConstraintVT.isFloatingPoint()) {
|
if (ConstraintVT.isFloatingPoint()) {
|
||||||
if (Subtarget->hasSSE2())
|
if (Subtarget->hasXMMInt())
|
||||||
return "Y";
|
return "Y";
|
||||||
if (Subtarget->hasSSE1())
|
if (Subtarget->hasXMM())
|
||||||
return "x";
|
return "x";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -11991,10 +11991,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
|
|||||||
if (!Subtarget->hasMMX()) break;
|
if (!Subtarget->hasMMX()) break;
|
||||||
return std::make_pair(0U, X86::VR64RegisterClass);
|
return std::make_pair(0U, X86::VR64RegisterClass);
|
||||||
case 'Y': // SSE_REGS if SSE2 allowed
|
case 'Y': // SSE_REGS if SSE2 allowed
|
||||||
if (!Subtarget->hasSSE2()) break;
|
if (!Subtarget->hasXMMInt()) break;
|
||||||
// FALL THROUGH.
|
// FALL THROUGH.
|
||||||
case 'x': // SSE_REGS if SSE1 allowed
|
case 'x': // SSE_REGS if SSE1 allowed
|
||||||
if (!Subtarget->hasSSE1()) break;
|
if (!Subtarget->hasXMM()) break;
|
||||||
|
|
||||||
switch (VT.getSimpleVT().SimpleTy) {
|
switch (VT.getSimpleVT().SimpleTy) {
|
||||||
default: break;
|
default: break;
|
||||||
|
@ -400,26 +400,26 @@ def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
|
|||||||
def HasCMov : Predicate<"Subtarget->hasCMov()">;
|
def HasCMov : Predicate<"Subtarget->hasCMov()">;
|
||||||
def NoCMov : Predicate<"!Subtarget->hasCMov()">;
|
def NoCMov : Predicate<"!Subtarget->hasCMov()">;
|
||||||
|
|
||||||
// FIXME: temporary hack to let codegen assert or generate poor code in case
|
def HasMMX : Predicate<"Subtarget->hasMMX()">;
|
||||||
// no AVX version of the desired intructions is present, this is better for
|
|
||||||
// incremental dev (without fallbacks it's easier to spot what's missing)
|
|
||||||
def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">;
|
|
||||||
def Has3DNow : Predicate<"Subtarget->has3DNow()">;
|
def Has3DNow : Predicate<"Subtarget->has3DNow()">;
|
||||||
def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
|
def Has3DNowA : Predicate<"Subtarget->has3DNowA()">;
|
||||||
def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
|
def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
|
||||||
def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
|
def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
|
||||||
def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
|
def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
|
||||||
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
|
def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
|
||||||
def HasSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
|
def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
|
||||||
def HasSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
|
def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
|
||||||
def HasSSE4A : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">;
|
def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
|
||||||
|
|
||||||
def HasAVX : Predicate<"Subtarget->hasAVX()">;
|
def HasAVX : Predicate<"Subtarget->hasAVX()">;
|
||||||
|
def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">;
|
||||||
|
|
||||||
|
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||||
def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
|
def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
|
||||||
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
|
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
|
||||||
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
|
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
|
||||||
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
|
def FPStackf32 : Predicate<"!Subtarget->hasXMM()">;
|
||||||
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
|
def FPStackf64 : Predicate<"!Subtarget->hasXMMInt()">;
|
||||||
def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate;
|
def In32BitMode : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate;
|
||||||
def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate;
|
def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate;
|
||||||
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
|
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
|
||||||
@ -436,7 +436,6 @@ def OptForSize : Predicate<"OptForSize">;
|
|||||||
def OptForSpeed : Predicate<"!OptForSize">;
|
def OptForSpeed : Predicate<"!OptForSize">;
|
||||||
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
|
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
|
||||||
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
|
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
|
||||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// X86 Instruction Format Definitions.
|
// X86 Instruction Format Definitions.
|
||||||
|
@ -712,6 +712,8 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
|
|||||||
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
[]>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
|
[]>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
|
||||||
}
|
}
|
||||||
|
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>;
|
||||||
|
|
||||||
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
|
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
|
||||||
"cvtsd2ss\t{$src, $dst|$dst, $src}",
|
"cvtsd2ss\t{$src, $dst|$dst, $src}",
|
||||||
[(set FR32:$dst, (fround FR64:$src))]>;
|
[(set FR32:$dst, (fround FR64:$src))]>;
|
||||||
@ -739,6 +741,8 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
|
|||||||
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
[]>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
|
[]>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
|
||||||
}
|
}
|
||||||
|
def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>;
|
||||||
|
|
||||||
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
|
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
|
||||||
"cvtss2sd\t{$src, $dst|$dst, $src}",
|
"cvtss2sd\t{$src, $dst|$dst, $src}",
|
||||||
[(set FR64:$dst, (fextend FR32:$src))]>, XS,
|
[(set FR64:$dst, (fextend FR32:$src))]>, XS,
|
||||||
@ -3680,7 +3684,7 @@ let Predicates = [HasSSE2] in
|
|||||||
(CVTSS2SDrm addr:$src)>;
|
(CVTSS2SDrm addr:$src)>;
|
||||||
|
|
||||||
// bit_convert
|
// bit_convert
|
||||||
let Predicates = [HasSSE2] in {
|
let Predicates = [HasXMMInt] in {
|
||||||
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
|
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
|
||||||
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
|
def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
|
||||||
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
|
def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
|
||||||
@ -3713,6 +3717,10 @@ let Predicates = [HasSSE2] in {
|
|||||||
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
|
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let Predicates = [HasAVX] in {
|
||||||
|
def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
// Move scalar to XMM zero-extended
|
// Move scalar to XMM zero-extended
|
||||||
// movd to XMM register zero-extends
|
// movd to XMM register zero-extends
|
||||||
let AddedComplexity = 15 in {
|
let AddedComplexity = 15 in {
|
||||||
|
@ -256,13 +256,13 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
|
|||||||
if ((ECX >> 9) & 1) X86SSELevel = SSSE3;
|
if ((ECX >> 9) & 1) X86SSELevel = SSSE3;
|
||||||
if ((ECX >> 19) & 1) X86SSELevel = SSE41;
|
if ((ECX >> 19) & 1) X86SSELevel = SSE41;
|
||||||
if ((ECX >> 20) & 1) X86SSELevel = SSE42;
|
if ((ECX >> 20) & 1) X86SSELevel = SSE42;
|
||||||
|
if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; }
|
||||||
|
|
||||||
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
|
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
|
||||||
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
|
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
|
||||||
|
|
||||||
HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
|
HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
|
||||||
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
|
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
|
||||||
HasAVX = ((ECX >> 28) & 0x1);
|
|
||||||
HasAES = IsIntel && ((ECX >> 25) & 0x1);
|
HasAES = IsIntel && ((ECX >> 25) & 0x1);
|
||||||
|
|
||||||
if (IsIntel || IsAMD) {
|
if (IsIntel || IsAMD) {
|
||||||
@ -316,11 +316,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
|
|||||||
ParseSubtargetFeatures(FS, CPU);
|
ParseSubtargetFeatures(FS, CPU);
|
||||||
// All X86-64 CPUs also have SSE2, however user might request no SSE via
|
// All X86-64 CPUs also have SSE2, however user might request no SSE via
|
||||||
// -mattr, so don't force SSELevel here.
|
// -mattr, so don't force SSELevel here.
|
||||||
|
if (HasAVX)
|
||||||
|
X86SSELevel = NoMMXSSE;
|
||||||
} else {
|
} else {
|
||||||
// Otherwise, use CPUID to auto-detect feature set.
|
// Otherwise, use CPUID to auto-detect feature set.
|
||||||
AutoDetectSubtargetFeatures();
|
AutoDetectSubtargetFeatures();
|
||||||
// Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
|
// Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
|
||||||
if (Is64Bit && X86SSELevel < SSE2)
|
if (Is64Bit && !HasAVX && X86SSELevel < SSE2)
|
||||||
X86SSELevel = SSE2;
|
X86SSELevel = SSE2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -155,6 +155,8 @@ public:
|
|||||||
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
|
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
|
||||||
bool hasPOPCNT() const { return HasPOPCNT; }
|
bool hasPOPCNT() const { return HasPOPCNT; }
|
||||||
bool hasAVX() const { return HasAVX; }
|
bool hasAVX() const { return HasAVX; }
|
||||||
|
bool hasXMM() const { return hasSSE1() || hasAVX(); }
|
||||||
|
bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
|
||||||
bool hasAES() const { return HasAES; }
|
bool hasAES() const { return HasAES; }
|
||||||
bool hasCLMUL() const { return HasCLMUL; }
|
bool hasCLMUL() const { return HasCLMUL; }
|
||||||
bool hasFMA3() const { return HasFMA3; }
|
bool hasFMA3() const { return HasFMA3; }
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
define void @zero() nounwind ssp {
|
define void @zero() nounwind ssp {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: vpxor
|
; CHECK: vxorps
|
||||||
; CHECK: vmovaps
|
; CHECK: vmovaps
|
||||||
store <4 x float> zeroinitializer, <4 x float>* @z, align 16
|
store <4 x float> zeroinitializer, <4 x float>* @z, align 16
|
||||||
ret void
|
ret void
|
||||||
|
Loading…
Reference in New Issue
Block a user