Fixed x86 code generation of multiple for v2i64. It was incorrect for SSE4.1.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61211 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Mon P Wang 2008-12-18 21:42:19 +00:00
parent 1d045486ca
commit af9b952627
4 changed files with 49 additions and 6 deletions

View File

@ -687,6 +687,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
setOperationAction(ISD::ADD, MVT::v4i32, Legal);
setOperationAction(ISD::ADD, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
@ -758,7 +759,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->hasSSE41()) {
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
@ -6136,6 +6136,50 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
return Op;
}
SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
// ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
// ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
// ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
// ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
// ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
//
// AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
// AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
// return AloBlo + AloBhi + AhiBlo;
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
A, DAG.getConstant(32, MVT::i32));
SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
B, DAG.getConstant(32, MVT::i32));
SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
A, B);
SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
A, Bhi);
SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
Ahi, B);
AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
AloBhi, DAG.getConstant(32, MVT::i32));
AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
AhiBlo, DAG.getConstant(32, MVT::i32));
SDValue Res = DAG.getNode(ISD::ADD, VT, AloBlo, AloBhi);
Res = DAG.getNode(ISD::ADD, VT, Res, AhiBlo);
return Res;
}
SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
// a "setcc" instruction that checks the overflow flag. The "brcond" lowering
@ -6305,6 +6349,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:

View File

@ -597,6 +597,7 @@ namespace llvm {
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG);
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG);
SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG);
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG);
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG);

View File

@ -581,7 +581,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PMINSWrr, X86::PMINSWrm },
{ X86::PMINUBrr, X86::PMINUBrm },
{ X86::PMULDQrr, X86::PMULDQrm },
{ X86::PMULDQrr_int, X86::PMULDQrm_int },
{ X86::PMULHUWrr, X86::PMULHUWrm },
{ X86::PMULHWrr, X86::PMULHWrm },
{ X86::PMULLDrr, X86::PMULLDrm },

View File

@ -3313,12 +3313,13 @@ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
int_x86_sse41_pmaxuw, 1>;
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>;
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
(PCMPEQQrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
(PCMPEQQrm VR128:$src1, addr:$src2)>;
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
let Constraints = "$src1 = $dst" in {
multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
@ -3353,9 +3354,6 @@ let Constraints = "$src1 = $dst" in {
}
defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
int_x86_sse41_pmulld, 1>;
defm PMULDQ : SS41I_binop_patint<0x28, "pmuldq", v2i64, mul,
int_x86_sse41_pmuldq, 1>;
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
let Constraints = "$src1 = $dst" in {