mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-06 23:32:27 +00:00
With SSE2, expand FCOPYSIGN to a series of SSE bitwise operations.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@32900 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e4929dd0cc
commit
68c47cba35
@ -250,9 +250,6 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
|
||||
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
|
||||
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
|
||||
|
||||
if (X86ScalarSSE) {
|
||||
// Set up the FP register classes.
|
||||
addRegisterClass(MVT::f32, X86::FR32RegisterClass);
|
||||
@ -266,6 +263,10 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
||||
setOperationAction(ISD::FNEG , MVT::f64, Custom);
|
||||
setOperationAction(ISD::FNEG , MVT::f32, Custom);
|
||||
|
||||
// Use ANDPD and ORPD to simulate FCOPYSIGN.
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
|
||||
|
||||
// We don't support sin/cos/fmod
|
||||
setOperationAction(ISD::FSIN , MVT::f64, Expand);
|
||||
setOperationAction(ISD::FCOS , MVT::f64, Expand);
|
||||
@ -283,7 +284,9 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
|
||||
// Set up the FP register classes.
|
||||
addRegisterClass(MVT::f64, X86::RFPRegisterClass);
|
||||
|
||||
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
|
||||
setOperationAction(ISD::UNDEF, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
|
||||
|
||||
if (!UnsafeFPMath) {
|
||||
setOperationAction(ISD::FSIN , MVT::f64 , Expand);
|
||||
@ -4123,6 +4126,56 @@ SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
|
||||
return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
|
||||
}
|
||||
|
||||
SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
|
||||
MVT::ValueType VT = Op.getValueType();
|
||||
MVT::ValueType SrcVT = Op.getOperand(1).getValueType();
|
||||
const Type *SrcTy = MVT::getTypeForValueType(SrcVT);
|
||||
// First get the sign bit of second operand.
|
||||
std::vector<Constant*> CV;
|
||||
if (SrcVT == MVT::f64) {
|
||||
CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63)));
|
||||
CV.push_back(ConstantFP::get(SrcTy, 0.0));
|
||||
} else {
|
||||
CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31)));
|
||||
CV.push_back(ConstantFP::get(SrcTy, 0.0));
|
||||
CV.push_back(ConstantFP::get(SrcTy, 0.0));
|
||||
CV.push_back(ConstantFP::get(SrcTy, 0.0));
|
||||
}
|
||||
Constant *CS = ConstantStruct::get(CV);
|
||||
SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
|
||||
std::vector<MVT::ValueType> Tys;
|
||||
Tys.push_back(VT);
|
||||
Tys.push_back(MVT::Other);
|
||||
SmallVector<SDOperand, 3> Ops;
|
||||
Ops.push_back(DAG.getEntryNode());
|
||||
Ops.push_back(CPIdx);
|
||||
Ops.push_back(DAG.getSrcValue(NULL));
|
||||
SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
|
||||
SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op.getOperand(1), Mask);
|
||||
|
||||
// Shift sign bit right or left if the two operands have different types.
|
||||
if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
|
||||
// Op0 is MVT::f32, Op1 is MVT::f64.
|
||||
SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
|
||||
SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
|
||||
DAG.getConstant(32, MVT::i32));
|
||||
SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
|
||||
SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
|
||||
DAG.getConstant(0, getPointerTy()));
|
||||
} else if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
|
||||
// Op0 is MVT::f64, Op1 is MVT::f32.
|
||||
SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, SignBit);
|
||||
SignBit = DAG.getNode(X86ISD::FSHL, MVT::v4f32, SignBit,
|
||||
DAG.getConstant(32, MVT::i32));
|
||||
SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, SignBit);
|
||||
SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f64, SignBit,
|
||||
DAG.getConstant(0, getPointerTy()));
|
||||
}
|
||||
|
||||
// Or the first operand with the sign bit.
|
||||
return DAG.getNode(X86ISD::FOR, VT, Op.getOperand(0), SignBit);
|
||||
}
|
||||
|
||||
SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
|
||||
SDOperand Chain) {
|
||||
assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
|
||||
@ -4955,6 +5008,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
|
||||
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
|
||||
case ISD::FABS: return LowerFABS(Op, DAG);
|
||||
case ISD::FNEG: return LowerFNEG(Op, DAG);
|
||||
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
|
||||
case ISD::SETCC: return LowerSETCC(Op, DAG, DAG.getEntryNode());
|
||||
case ISD::SELECT: return LowerSELECT(Op, DAG);
|
||||
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
||||
@ -4976,7 +5030,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::SHLD: return "X86ISD::SHLD";
|
||||
case X86ISD::SHRD: return "X86ISD::SHRD";
|
||||
case X86ISD::FAND: return "X86ISD::FAND";
|
||||
case X86ISD::FOR: return "X86ISD::FOR";
|
||||
case X86ISD::FXOR: return "X86ISD::FXOR";
|
||||
case X86ISD::FSHL: return "X86ISD::FSHL";
|
||||
case X86ISD::FSRL: return "X86ISD::FSRL";
|
||||
case X86ISD::FILD: return "X86ISD::FILD";
|
||||
case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG";
|
||||
case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
|
||||
|
@ -35,10 +35,20 @@ namespace llvm {
|
||||
/// to X86::ANDPS or X86::ANDPD.
|
||||
FAND,
|
||||
|
||||
/// FOR - Bitwise logical OR of floating point values. This corresponds
|
||||
/// to X86::ORPS or X86::ORPD.
|
||||
FOR,
|
||||
|
||||
/// FXOR - Bitwise logical XOR of floating point values. This corresponds
|
||||
/// to X86::XORPS or X86::XORPD.
|
||||
FXOR,
|
||||
|
||||
/// FSHL, FSRL - Shift a floating point value (in SSE register) by n bits
|
||||
/// while shifting in 0's. These corresponds to X86::PSLLDQ or
|
||||
/// X86::PSRLDQ.
|
||||
FSHL,
|
||||
FSRL,
|
||||
|
||||
/// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
|
||||
/// integer source in memory and FP reg result. This corresponds to the
|
||||
/// X86::FILD*m instructions. It has three inputs (token chain, address,
|
||||
@ -389,6 +399,7 @@ namespace llvm {
|
||||
SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerFABS(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerFNEG(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG, SDOperand Chain);
|
||||
SDOperand LowerSELECT(SDOperand Op, SelectionDAG &DAG);
|
||||
SDOperand LowerBRCOND(SDOperand Op, SelectionDAG &DAG);
|
||||
|
@ -18,14 +18,21 @@
|
||||
// SSE specific DAG Nodes.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
|
||||
SDTCisFP<0>, SDTCisInt<2> ]>;
|
||||
|
||||
def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, [SDNPHasChain]>;
|
||||
def X86loadu : SDNode<"X86ISD::LOAD_UA", SDTLoad, [SDNPHasChain]>;
|
||||
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
|
||||
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
|
||||
def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]>;
|
||||
def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]>;
|
||||
def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]>;
|
||||
def X86fshl : SDNode<"X86ISD::FSHL", SDTX86FPShiftOp>;
|
||||
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
|
||||
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest,
|
||||
[SDNPHasChain, SDNPOutFlag]>;
|
||||
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest,
|
||||
@ -607,9 +614,11 @@ def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
|
||||
"andpd {$src2, $dst|$dst, $src2}",
|
||||
[(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
|
||||
def FsORPSrr : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
|
||||
"orps {$src2, $dst|$dst, $src2}", []>;
|
||||
"orps {$src2, $dst|$dst, $src2}",
|
||||
[(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>;
|
||||
def FsORPDrr : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
|
||||
"orpd {$src2, $dst|$dst, $src2}", []>;
|
||||
"orpd {$src2, $dst|$dst, $src2}",
|
||||
[(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
|
||||
def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
|
||||
"xorps {$src2, $dst|$dst, $src2}",
|
||||
[(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
|
||||
@ -626,9 +635,13 @@ def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
|
||||
[(set FR64:$dst, (X86fand FR64:$src1,
|
||||
(X86loadpf64 addr:$src2)))]>;
|
||||
def FsORPSrm : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
|
||||
"orps {$src2, $dst|$dst, $src2}", []>;
|
||||
"orps {$src2, $dst|$dst, $src2}",
|
||||
[(set FR32:$dst, (X86for FR32:$src1,
|
||||
(X86loadpf32 addr:$src2)))]>;
|
||||
def FsORPDrm : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
|
||||
"orpd {$src2, $dst|$dst, $src2}", []>;
|
||||
"orpd {$src2, $dst|$dst, $src2}",
|
||||
[(set FR64:$dst, (X86for FR64:$src1,
|
||||
(X86loadpf64 addr:$src2)))]>;
|
||||
def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
|
||||
"xorps {$src2, $dst|$dst, $src2}",
|
||||
[(set FR32:$dst, (X86fxor FR32:$src1,
|
||||
@ -1364,6 +1377,10 @@ let Predicates = [HasSSE2] in {
|
||||
(v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
|
||||
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
|
||||
(v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
|
||||
def : Pat<(v4f32 (X86fshl VR128:$src1, i32immSExt8:$src2)),
|
||||
(v4f32 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
|
||||
def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
|
||||
(v2f64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
|
||||
}
|
||||
|
||||
// Logical
|
||||
|
Loading…
x
Reference in New Issue
Block a user