mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-16 14:31:59 +00:00
Add AVX2 instruction lowering for add, sub, and mul.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144174 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
89d093d5b6
commit
aaa643c70e
@ -1031,25 +1031,42 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
|
||||
|
||||
setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
|
||||
setOperationAction(ISD::VSELECT, MVT::v4i64, Legal);
|
||||
setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
|
||||
setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
|
||||
setOperationAction(ISD::VSELECT, MVT::v4i64, Legal);
|
||||
setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
|
||||
|
||||
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::ADD, MVT::v16i16, Custom);
|
||||
setOperationAction(ISD::ADD, MVT::v32i8, Custom);
|
||||
if (Subtarget->hasAVX2()) {
|
||||
setOperationAction(ISD::ADD, MVT::v4i64, Legal);
|
||||
setOperationAction(ISD::ADD, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::ADD, MVT::v16i16, Legal);
|
||||
setOperationAction(ISD::ADD, MVT::v32i8, Legal);
|
||||
|
||||
setOperationAction(ISD::SUB, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::SUB, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::SUB, MVT::v16i16, Custom);
|
||||
setOperationAction(ISD::SUB, MVT::v32i8, Custom);
|
||||
setOperationAction(ISD::SUB, MVT::v4i64, Legal);
|
||||
setOperationAction(ISD::SUB, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::SUB, MVT::v16i16, Legal);
|
||||
setOperationAction(ISD::SUB, MVT::v32i8, Legal);
|
||||
|
||||
setOperationAction(ISD::MUL, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::MUL, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::MUL, MVT::v16i16, Custom);
|
||||
// Don't lower v32i8 because there is no 128-bit byte mul
|
||||
setOperationAction(ISD::MUL, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::MUL, MVT::v8i32, Legal);
|
||||
setOperationAction(ISD::MUL, MVT::v16i16, Legal);
|
||||
// Don't lower v32i8 because there is no 128-bit byte mul
|
||||
} else {
|
||||
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::ADD, MVT::v16i16, Custom);
|
||||
setOperationAction(ISD::ADD, MVT::v32i8, Custom);
|
||||
|
||||
setOperationAction(ISD::SUB, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::SUB, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::SUB, MVT::v16i16, Custom);
|
||||
setOperationAction(ISD::SUB, MVT::v32i8, Custom);
|
||||
|
||||
setOperationAction(ISD::MUL, MVT::v4i64, Custom);
|
||||
setOperationAction(ISD::MUL, MVT::v8i32, Custom);
|
||||
setOperationAction(ISD::MUL, MVT::v16i16, Custom);
|
||||
// Don't lower v32i8 because there is no 128-bit byte mul
|
||||
}
|
||||
|
||||
// Custom lower several nodes for 256-bit types.
|
||||
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
|
||||
@ -10004,12 +10021,55 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
// Decompose 256-bit ops into smaller 128-bit ops.
|
||||
if (VT.getSizeInBits() == 256)
|
||||
if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
|
||||
return Lower256IntArith(Op, DAG);
|
||||
|
||||
assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
SDValue A = Op.getOperand(0);
|
||||
SDValue B = Op.getOperand(1);
|
||||
|
||||
if (VT == MVT::v4i64) {
|
||||
assert(Subtarget->hasAVX2() && "Lowering v4i64 multiply requires AVX2");
|
||||
|
||||
// ulong2 Ahi = __builtin_ia32_psrlqi256( a, 32);
|
||||
// ulong2 Bhi = __builtin_ia32_psrlqi256( b, 32);
|
||||
// ulong2 AloBlo = __builtin_ia32_pmuludq256( a, b );
|
||||
// ulong2 AloBhi = __builtin_ia32_pmuludq256( a, Bhi );
|
||||
// ulong2 AhiBlo = __builtin_ia32_pmuludq256( Ahi, b );
|
||||
//
|
||||
// AloBhi = __builtin_ia32_psllqi256( AloBhi, 32 );
|
||||
// AhiBlo = __builtin_ia32_psllqi256( AhiBlo, 32 );
|
||||
// return AloBlo + AloBhi + AhiBlo;
|
||||
|
||||
SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32),
|
||||
A, DAG.getConstant(32, MVT::i32));
|
||||
SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32),
|
||||
B, DAG.getConstant(32, MVT::i32));
|
||||
SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32),
|
||||
A, B);
|
||||
SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32),
|
||||
A, Bhi);
|
||||
SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32),
|
||||
Ahi, B);
|
||||
AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32),
|
||||
AloBhi, DAG.getConstant(32, MVT::i32));
|
||||
AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32),
|
||||
AhiBlo, DAG.getConstant(32, MVT::i32));
|
||||
SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
|
||||
Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
|
||||
return Res;
|
||||
}
|
||||
|
||||
assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
|
||||
|
||||
// ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
|
||||
// ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
|
||||
// ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
|
||||
@ -10020,9 +10080,6 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
|
||||
// AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
|
||||
// return AloBlo + AloBhi + AhiBlo;
|
||||
|
||||
SDValue A = Op.getOperand(0);
|
||||
SDValue B = Op.getOperand(1);
|
||||
|
||||
SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
|
||||
DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
|
||||
A, DAG.getConstant(32, MVT::i32));
|
||||
|
76
test/CodeGen/X86/avx2-arith.ll
Normal file
76
test/CodeGen/X86/avx2-arith.ll
Normal file
@ -0,0 +1,76 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
|
||||
|
||||
; CHECK: vpaddq %ymm
|
||||
define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
%x = add <4 x i64> %i, %j
|
||||
ret <4 x i64> %x
|
||||
}
|
||||
|
||||
; CHECK: vpaddd %ymm
|
||||
define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
%x = add <8 x i32> %i, %j
|
||||
ret <8 x i32> %x
|
||||
}
|
||||
|
||||
; CHECK: vpaddw %ymm
|
||||
define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
|
||||
%x = add <16 x i16> %i, %j
|
||||
ret <16 x i16> %x
|
||||
}
|
||||
|
||||
; CHECK: vpaddb %ymm
|
||||
define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
%x = add <32 x i8> %i, %j
|
||||
ret <32 x i8> %x
|
||||
}
|
||||
|
||||
; CHECK: vpsubq %ymm
|
||||
define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
%x = sub <4 x i64> %i, %j
|
||||
ret <4 x i64> %x
|
||||
}
|
||||
|
||||
; CHECK: vpsubd %ymm
|
||||
define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
%x = sub <8 x i32> %i, %j
|
||||
ret <8 x i32> %x
|
||||
}
|
||||
|
||||
; CHECK: vpsubw %ymm
|
||||
define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
|
||||
%x = sub <16 x i16> %i, %j
|
||||
ret <16 x i16> %x
|
||||
}
|
||||
|
||||
; CHECK: vpsubb %ymm
|
||||
define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
|
||||
%x = sub <32 x i8> %i, %j
|
||||
ret <32 x i8> %x
|
||||
}
|
||||
|
||||
; CHECK: vpmulld %ymm
|
||||
define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
|
||||
%x = mul <8 x i32> %i, %j
|
||||
ret <8 x i32> %x
|
||||
}
|
||||
|
||||
; CHECK: vpmullw %ymm
|
||||
define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
|
||||
%x = mul <16 x i16> %i, %j
|
||||
ret <16 x i16> %x
|
||||
}
|
||||
|
||||
; CHECK: vpmuludq %ymm
|
||||
; CHECK-NEXT: vpsrlq $32, %ymm
|
||||
; CHECK-NEXT: vpmuludq %ymm
|
||||
; CHECK-NEXT: vpsllq $32, %ymm
|
||||
; CHECK-NEXT: vpaddq %ymm
|
||||
; CHECK-NEXT: vpsrlq $32, %ymm
|
||||
; CHECK-NEXT: vpmuludq %ymm
|
||||
; CHECK-NEXT: vpsllq $32, %ymm
|
||||
; CHECK-NEXT: vpaddq %ymm
|
||||
define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
|
||||
%x = mul <4 x i64> %i, %j
|
||||
ret <4 x i64> %x
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user