mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 05:32:25 +00:00
For AArch64, lowering sext_inreg and generate optimized code by using SXTL.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199296 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
38e6f7301d
commit
b6db372c96
@ -286,6 +286,15 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
|
||||
setExceptionSelectorRegister(AArch64::X1);
|
||||
|
||||
if (Subtarget->hasNEON()) {
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v1i64, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v16i8, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Expand);
|
||||
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
|
||||
@ -3574,7 +3583,25 @@ static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) {
|
||||
return (Cnt >= 1 && Cnt <= ElementBits);
|
||||
}
|
||||
|
||||
/// Checks for immediate versions of vector shifts and lowers them.
|
||||
static SDValue GenForSextInreg(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
EVT SrcVT, EVT DestVT, EVT SubRegVT,
|
||||
const int *Mask, SDValue Src) {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDValue Bitcast
|
||||
= DAG.getNode(ISD::BITCAST, SDLoc(N), SrcVT, Src);
|
||||
SDValue Sext
|
||||
= DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), DestVT, Bitcast);
|
||||
SDValue ShuffleVec
|
||||
= DAG.getVectorShuffle(DestVT, SDLoc(N), Sext, DAG.getUNDEF(DestVT), Mask);
|
||||
SDValue ExtractSubreg
|
||||
= SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N),
|
||||
SubRegVT, ShuffleVec,
|
||||
DAG.getTargetConstant(AArch64::sub_64, MVT::i32)), 0);
|
||||
return ExtractSubreg;
|
||||
}
|
||||
|
||||
/// Checks for vector shifts and lowers them.
|
||||
static SDValue PerformShiftCombine(SDNode *N,
|
||||
TargetLowering::DAGCombinerInfo &DCI,
|
||||
const AArch64Subtarget *ST) {
|
||||
@ -3583,6 +3610,51 @@ static SDValue PerformShiftCombine(SDNode *N,
|
||||
if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64))
|
||||
return PerformSRACombine(N, DCI);
|
||||
|
||||
// We're looking for an SRA/SHL pair to help generating instruction
|
||||
// sshll v0.8h, v0.8b, #0
|
||||
// The instruction STXL is also the alias of this instruction.
|
||||
//
|
||||
// For example, for DAG like below,
|
||||
// v2i32 = sra (v2i32 (shl v2i32, 16)), 16
|
||||
// we can transform it into
|
||||
// v2i32 = EXTRACT_SUBREG
|
||||
// (v4i32 (suffle_vector
|
||||
// (v4i32 (sext (v4i16 (bitcast v2i32))),
|
||||
// undef, (0, 2, u, u)),
|
||||
// sub_64
|
||||
//
|
||||
// With this transformation we expect to generate "SSHLL + UZIP1"
|
||||
// Sometimes UZIP1 can be optimized away by combining with other context.
|
||||
int64_t ShrCnt, ShlCnt;
|
||||
if (N->getOpcode() == ISD::SRA
|
||||
&& (VT == MVT::v2i32 || VT == MVT::v4i16)
|
||||
&& isVShiftRImm(N->getOperand(1), VT, ShrCnt)
|
||||
&& N->getOperand(0).getOpcode() == ISD::SHL
|
||||
&& isVShiftRImm(N->getOperand(0).getOperand(1), VT, ShlCnt)) {
|
||||
SDValue Src = N->getOperand(0).getOperand(0);
|
||||
if (VT == MVT::v2i32 && ShrCnt == 16 && ShlCnt == 16) {
|
||||
// sext_inreg(v2i32, v2i16)
|
||||
// We essentially only care the Mask {0, 2, u, u}
|
||||
int Mask[4] = {0, 2, 4, 6};
|
||||
return GenForSextInreg(N, DCI, MVT::v4i16, MVT::v4i32, MVT::v2i32,
|
||||
Mask, Src);
|
||||
}
|
||||
else if (VT == MVT::v2i32 && ShrCnt == 24 && ShlCnt == 24) {
|
||||
// sext_inreg(v2i16, v2i8)
|
||||
// We essentially only care the Mask {0, u, 4, u, u, u, u, u, u, u, u, u}
|
||||
int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14};
|
||||
return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v2i32,
|
||||
Mask, Src);
|
||||
}
|
||||
else if (VT == MVT::v4i16 && ShrCnt == 8 && ShlCnt == 8) {
|
||||
// sext_inreg(v4i16, v4i8)
|
||||
// We essentially only care the Mask {0, 2, 4, 6, u, u, u, u, u, u, u, u}
|
||||
int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14};
|
||||
return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v4i16,
|
||||
Mask, Src);
|
||||
}
|
||||
}
|
||||
|
||||
// Nothing to be done for scalar shifts.
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
if (!VT.isVector() || !TLI.isTypeLegal(VT))
|
||||
|
@ -1877,6 +1877,10 @@ def UXTL2vv_16B : NeonI_ext_len_alias<"uxtl2", ".8h", ".16b", USHLLvvi_16B, VPR
|
||||
def UXTL2vv_8H : NeonI_ext_len_alias<"uxtl2", ".4s", ".8h", USHLLvvi_8H, VPR128, VPR128>;
|
||||
def UXTL2vv_4S : NeonI_ext_len_alias<"uxtl2", ".2d", ".4s", USHLLvvi_4S, VPR128, VPR128>;
|
||||
|
||||
def : Pat<(v8i16 (anyext (v8i8 VPR64:$Rn))), (USHLLvvi_8B VPR64:$Rn, 0)>;
|
||||
def : Pat<(v4i32 (anyext (v4i16 VPR64:$Rn))), (USHLLvvi_4H VPR64:$Rn, 0)>;
|
||||
def : Pat<(v2i64 (anyext (v2i32 VPR64:$Rn))), (USHLLvvi_2S VPR64:$Rn, 0)>;
|
||||
|
||||
// Rounding/Saturating shift
|
||||
class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
|
||||
RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
|
||||
|
198
test/CodeGen/AArch64/sext_inreg.ll
Normal file
198
test/CodeGen/AArch64/sext_inreg.ll
Normal file
@ -0,0 +1,198 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||
|
||||
; For formal arguments, we have the following vector type promotion,
|
||||
; v2i8 is promoted to v2i32(f64)
|
||||
; v2i16 is promoted to v2i32(f64)
|
||||
; v4i8 is promoted to v4i16(f64)
|
||||
; v8i1 is promoted to v8i16(f128)
|
||||
|
||||
define <2 x i8> @test_sext_inreg_v2i8i16(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v2i8i16
|
||||
; CHECK: sshll v0.8h, v0.8b, #0
|
||||
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h
|
||||
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
|
||||
; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h
|
||||
%1 = sext <2 x i8> %v1 to <2 x i16>
|
||||
%2 = sext <2 x i8> %v2 to <2 x i16>
|
||||
%3 = shufflevector <2 x i16> %1, <2 x i16> %2, <2 x i32> <i32 0, i32 2>
|
||||
%4 = trunc <2 x i16> %3 to <2 x i8>
|
||||
ret <2 x i8> %4
|
||||
}
|
||||
|
||||
define <2 x i8> @test_sext_inreg_v2i8i16_2(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v2i8i16_2
|
||||
; CHECK: sshll v0.8h, v0.8b, #0
|
||||
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h
|
||||
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
|
||||
; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h
|
||||
%a1 = shl <2 x i32> %v1, <i32 24, i32 24>
|
||||
%a2 = ashr <2 x i32> %a1, <i32 24, i32 24>
|
||||
%b1 = shl <2 x i32> %v2, <i32 24, i32 24>
|
||||
%b2 = ashr <2 x i32> %b1, <i32 24, i32 24>
|
||||
%c = shufflevector <2 x i32> %a2, <2 x i32> %b2, <2 x i32> <i32 0, i32 2>
|
||||
%d = trunc <2 x i32> %c to <2 x i8>
|
||||
ret <2 x i8> %d
|
||||
}
|
||||
|
||||
define <2 x i8> @test_sext_inreg_v2i8i32(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v2i8i32
|
||||
; CHECK: sshll v0.8h, v0.8b, #0
|
||||
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h
|
||||
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
|
||||
; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h
|
||||
%1 = sext <2 x i8> %v1 to <2 x i32>
|
||||
%2 = sext <2 x i8> %v2 to <2 x i32>
|
||||
%3 = shufflevector <2 x i32> %1, <2 x i32> %2, <2 x i32> <i32 0, i32 2>
|
||||
%4 = trunc <2 x i32> %3 to <2 x i8>
|
||||
ret <2 x i8> %4
|
||||
}
|
||||
|
||||
define <2 x i8> @test_sext_inreg_v2i8i64(<2 x i8> %v1, <2 x i8> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v2i8i64
|
||||
; CHECK: ushll v1.2d, v1.2s, #0
|
||||
; CHECK: ushll v0.2d, v0.2s, #0
|
||||
; CHECK: shl v0.2d, v0.2d, #56
|
||||
; CHECK: sshr v0.2d, v0.2d, #56
|
||||
; CHECK: shl v1.2d, v1.2d, #56
|
||||
; CHECK: sshr v1.2d, v1.2d, #56
|
||||
%1 = sext <2 x i8> %v1 to <2 x i64>
|
||||
%2 = sext <2 x i8> %v2 to <2 x i64>
|
||||
%3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
|
||||
%4 = trunc <2 x i64> %3 to <2 x i8>
|
||||
ret <2 x i8> %4
|
||||
}
|
||||
|
||||
define <4 x i8> @test_sext_inreg_v4i8i16(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v4i8i16
|
||||
; CHECK: sshll v0.8h, v0.8b, #0
|
||||
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h
|
||||
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
|
||||
; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h
|
||||
%1 = sext <4 x i8> %v1 to <4 x i16>
|
||||
%2 = sext <4 x i8> %v2 to <4 x i16>
|
||||
%3 = shufflevector <4 x i16> %1, <4 x i16> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%4 = trunc <4 x i16> %3 to <4 x i8>
|
||||
ret <4 x i8> %4
|
||||
}
|
||||
|
||||
define <4 x i8> @test_sext_inreg_v4i8i16_2(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v4i8i16_2
|
||||
; CHECK: sshll v0.8h, v0.8b, #0
|
||||
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h
|
||||
; CHECK-NEXT: sshll v1.8h, v1.8b, #0
|
||||
; CHECK-NEXT: uzp1 v1.8h, v1.8h, v1.8h
|
||||
%a1 = shl <4 x i16> %v1, <i16 8, i16 8, i16 8, i16 8>
|
||||
%a2 = ashr <4 x i16> %a1, <i16 8, i16 8, i16 8, i16 8>
|
||||
%b1 = shl <4 x i16> %v2, <i16 8, i16 8, i16 8, i16 8>
|
||||
%b2 = ashr <4 x i16> %b1, <i16 8, i16 8, i16 8, i16 8>
|
||||
%c = shufflevector <4 x i16> %a2, <4 x i16> %b2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%d = trunc <4 x i16> %c to <4 x i8>
|
||||
ret <4 x i8> %d
|
||||
}
|
||||
|
||||
define <4 x i8> @test_sext_inreg_v4i8i32(<4 x i8> %v1, <4 x i8> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v4i8i32
|
||||
; CHECK: ushll v1.4s, v1.4h, #0
|
||||
; CHECK: ushll v0.4s, v0.4h, #0
|
||||
; CHECK: shl v0.4s, v0.4s, #24
|
||||
; CHECK: sshr v0.4s, v0.4s, #24
|
||||
; CHECK: shl v1.4s, v1.4s, #24
|
||||
; CHECK: sshr v1.4s, v1.4s, #24
|
||||
%1 = sext <4 x i8> %v1 to <4 x i32>
|
||||
%2 = sext <4 x i8> %v2 to <4 x i32>
|
||||
%3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%4 = trunc <4 x i32> %3 to <4 x i8>
|
||||
ret <4 x i8> %4
|
||||
}
|
||||
|
||||
define <8 x i8> @test_sext_inreg_v8i8i16(<8 x i8> %v1, <8 x i8> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v8i8i16
|
||||
; CHECK: sshll v0.8h, v0.8b, #0
|
||||
; CHECK: sshll v1.8h, v1.8b, #0
|
||||
%1 = sext <8 x i8> %v1 to <8 x i16>
|
||||
%2 = sext <8 x i8> %v2 to <8 x i16>
|
||||
%3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
|
||||
%4 = trunc <8 x i16> %3 to <8 x i8>
|
||||
ret <8 x i8> %4
|
||||
}
|
||||
|
||||
define <8 x i1> @test_sext_inreg_v8i1i16(<8 x i1> %v1, <8 x i1> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v8i1i16
|
||||
; CHECK: ushll v1.8h, v1.8b, #0
|
||||
; CHECK: ushll v0.8h, v0.8b, #0
|
||||
; CHECK: shl v0.8h, v0.8h, #15
|
||||
; CHECK: sshr v0.8h, v0.8h, #15
|
||||
; CHECK: shl v1.8h, v1.8h, #15
|
||||
; CHECK: sshr v1.8h, v1.8h, #15
|
||||
%1 = sext <8 x i1> %v1 to <8 x i16>
|
||||
%2 = sext <8 x i1> %v2 to <8 x i16>
|
||||
%3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
|
||||
%4 = trunc <8 x i16> %3 to <8 x i1>
|
||||
ret <8 x i1> %4
|
||||
}
|
||||
|
||||
define <2 x i16> @test_sext_inreg_v2i16i32(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v2i16i32
|
||||
; CHECK: sshll v0.4s, v0.4h, #0
|
||||
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
|
||||
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
|
||||
; CHECK-NEXT: uzp1 v1.4s, v1.4s, v1.4s
|
||||
%1 = sext <2 x i16> %v1 to <2 x i32>
|
||||
%2 = sext <2 x i16> %v2 to <2 x i32>
|
||||
%3 = shufflevector <2 x i32> %1, <2 x i32> %2, <2 x i32> <i32 0, i32 2>
|
||||
%4 = trunc <2 x i32> %3 to <2 x i16>
|
||||
ret <2 x i16> %4
|
||||
}
|
||||
|
||||
define <2 x i16> @test_sext_inreg_v2i16i32_2(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v2i16i32_2
|
||||
; CHECK: sshll v0.4s, v0.4h, #0
|
||||
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
|
||||
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
|
||||
; CHECK-NEXT: uzp1 v1.4s, v1.4s, v1.4s
|
||||
%a1 = shl <2 x i32> %v1, <i32 16, i32 16>
|
||||
%a2 = ashr <2 x i32> %a1, <i32 16, i32 16>
|
||||
%b1 = shl <2 x i32> %v2, <i32 16, i32 16>
|
||||
%b2 = ashr <2 x i32> %b1, <i32 16, i32 16>
|
||||
%c = shufflevector <2 x i32> %a2, <2 x i32> %b2, <2 x i32> <i32 0, i32 2>
|
||||
%d = trunc <2 x i32> %c to <2 x i16>
|
||||
ret <2 x i16> %d
|
||||
}
|
||||
|
||||
define <2 x i16> @test_sext_inreg_v2i16i64(<2 x i16> %v1, <2 x i16> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v2i16i64
|
||||
; CHECK: ushll v1.2d, v1.2s, #0
|
||||
; CHECK: ushll v0.2d, v0.2s, #0
|
||||
; CHECK: shl v0.2d, v0.2d, #48
|
||||
; CHECK: sshr v0.2d, v0.2d, #48
|
||||
; CHECK: shl v1.2d, v1.2d, #48
|
||||
; CHECK: sshr v1.2d, v1.2d, #48
|
||||
%1 = sext <2 x i16> %v1 to <2 x i64>
|
||||
%2 = sext <2 x i16> %v2 to <2 x i64>
|
||||
%3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
|
||||
%4 = trunc <2 x i64> %3 to <2 x i16>
|
||||
ret <2 x i16> %4
|
||||
}
|
||||
|
||||
define <4 x i16> @test_sext_inreg_v4i16i32(<4 x i16> %v1, <4 x i16> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v4i16i32
|
||||
; CHECK: sshll v0.4s, v0.4h, #0
|
||||
; CHECK: sshll v1.4s, v1.4h, #0
|
||||
%1 = sext <4 x i16> %v1 to <4 x i32>
|
||||
%2 = sext <4 x i16> %v2 to <4 x i32>
|
||||
%3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%4 = trunc <4 x i32> %3 to <4 x i16>
|
||||
ret <4 x i16> %4
|
||||
}
|
||||
|
||||
define <2 x i32> @test_sext_inreg_v2i32i64(<2 x i32> %v1, <2 x i32> %v2) nounwind readnone {
|
||||
; CHECK-LABEL: test_sext_inreg_v2i32i64
|
||||
; CHECK: sshll v0.2d, v0.2s, #0
|
||||
; CHECK: sshll v1.2d, v1.2s, #0
|
||||
%1 = sext <2 x i32> %v1 to <2 x i64>
|
||||
%2 = sext <2 x i32> %v2 to <2 x i64>
|
||||
%3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
|
||||
%4 = trunc <2 x i64> %3 to <2 x i32>
|
||||
ret <2 x i32> %4
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user