mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-06 09:44:39 +00:00
Fixed x86 code generation of multiple for v2i64. It was incorrect for SSE4.1.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61211 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1d045486ca
commit
af9b952627
@ -687,6 +687,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
|
||||
setOperationAction(ISD::ADD, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::ADD, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
|
||||
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
|
||||
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
|
||||
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
|
||||
@ -758,7 +759,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
if (Subtarget->hasSSE41()) {
|
||||
// FIXME: Do we need to handle scalar-to-vector here?
|
||||
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
|
||||
|
||||
// i8 and i16 vectors are custom , because the source register and source
|
||||
// source memory operand types are not the same width. f32 vectors are
|
||||
@ -6136,6 +6136,50 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
|
||||
return Op;
|
||||
}
|
||||
|
||||
SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
|
||||
MVT VT = Op.getValueType();
|
||||
assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
|
||||
|
||||
// ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
|
||||
// ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
|
||||
// ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
|
||||
// ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
|
||||
// ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
|
||||
//
|
||||
// AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
|
||||
// AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
|
||||
// return AloBlo + AloBhi + AhiBlo;
|
||||
|
||||
SDValue A = Op.getOperand(0);
|
||||
SDValue B = Op.getOperand(1);
|
||||
|
||||
SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
|
||||
A, DAG.getConstant(32, MVT::i32));
|
||||
SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
|
||||
B, DAG.getConstant(32, MVT::i32));
|
||||
SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
|
||||
A, B);
|
||||
SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
|
||||
A, Bhi);
|
||||
SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
|
||||
Ahi, B);
|
||||
AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
|
||||
AloBhi, DAG.getConstant(32, MVT::i32));
|
||||
AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
|
||||
AhiBlo, DAG.getConstant(32, MVT::i32));
|
||||
SDValue Res = DAG.getNode(ISD::ADD, VT, AloBlo, AloBhi);
|
||||
Res = DAG.getNode(ISD::ADD, VT, Res, AhiBlo);
|
||||
return Res;
|
||||
}
|
||||
|
||||
|
||||
SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
|
||||
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
|
||||
// a "setcc" instruction that checks the overflow flag. The "brcond" lowering
|
||||
@ -6305,6 +6349,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
|
||||
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
|
||||
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
|
||||
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
|
||||
case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
|
||||
case ISD::SADDO:
|
||||
case ISD::UADDO:
|
||||
case ISD::SSUBO:
|
||||
|
@ -597,6 +597,7 @@ namespace llvm {
|
||||
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG);
|
||||
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG);
|
||||
|
||||
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG);
|
||||
|
@ -581,7 +581,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::PMINSWrr, X86::PMINSWrm },
|
||||
{ X86::PMINUBrr, X86::PMINUBrm },
|
||||
{ X86::PMULDQrr, X86::PMULDQrm },
|
||||
{ X86::PMULDQrr_int, X86::PMULDQrm_int },
|
||||
{ X86::PMULHUWrr, X86::PMULHUWrm },
|
||||
{ X86::PMULHWrr, X86::PMULHWrm },
|
||||
{ X86::PMULLDrr, X86::PMULLDrm },
|
||||
|
@ -3313,12 +3313,13 @@ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
|
||||
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
|
||||
int_x86_sse41_pmaxuw, 1>;
|
||||
|
||||
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>;
|
||||
|
||||
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
|
||||
(PCMPEQQrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
|
||||
(PCMPEQQrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
|
||||
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
|
||||
@ -3353,9 +3354,6 @@ let Constraints = "$src1 = $dst" in {
|
||||
}
|
||||
defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
|
||||
int_x86_sse41_pmulld, 1>;
|
||||
defm PMULDQ : SS41I_binop_patint<0x28, "pmuldq", v2i64, mul,
|
||||
int_x86_sse41_pmuldq, 1>;
|
||||
|
||||
|
||||
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
|
Loading…
x
Reference in New Issue
Block a user