From 0a9ff8776b5fcffdb5292f261da04085ea8e116f Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Wed, 18 Dec 2013 06:26:04 +0000 Subject: [PATCH] [AArch64 NEON]Implment loading vector constant form constant pool. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@197551 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 9 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 32 +++++++++ lib/Target/AArch64/AArch64ISelLowering.h | 2 + lib/Target/AArch64/AArch64InstrInfo.td | 1 + lib/Target/AArch64/AArch64InstrNEON.td | 44 ++++++++++++ test/CodeGen/AArch64/neon-mov.ll | 4 +- test/CodeGen/AArch64/neon-simd-ldst-one.ll | 81 ++++++++++++++++++++++ 7 files changed, 163 insertions(+), 10 deletions(-) diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index ef99541c170..dac4b32cfec 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1113,15 +1113,6 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy, TFI, CurDAG->getTargetConstant(0, PtrTy)); } - case ISD::ConstantPool: { - // Constant pools are fine, just create a Target entry. - ConstantPoolSDNode *CN = cast(Node); - const Constant *C = CN->getConstVal(); - SDValue CP = CurDAG->getTargetConstantPool(C, CN->getValueType(0)); - - ReplaceUses(SDValue(Node, 0), CP); - return NULL; - } case ISD::Constant: { SDNode *ResNode = 0; if (cast(Node)->getZExtValue() == 0) { diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 3266fc2dcc3..1b75d0571ae 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -140,6 +140,7 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::VAARG, MVT::Other, Expand); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); @@ -2268,6 +2269,36 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, } } +SDValue +AArch64TargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT PtrVT = getPointerTy(); + ConstantPoolSDNode *CN = cast(Op); + const Constant *C = CN->getConstVal(); + + switch(getTargetMachine().getCodeModel()) { + case CodeModel::Small: + // The most efficient code is PC-relative anyway for the small memory model, + // so we don't need to worry about relocation model. + return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, + DAG.getTargetConstantPool(C, PtrVT, 0, 0, + AArch64II::MO_NO_FLAG), + DAG.getTargetConstantPool(C, PtrVT, 0, 0, + AArch64II::MO_LO12), + DAG.getConstant(CN->getAlignment(), MVT::i32)); + case CodeModel::Large: + return DAG.getNode( + AArch64ISD::WrapperLarge, DL, PtrVT, + DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G3), + DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC), + DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC), + DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC)); + default: + llvm_unreachable("Only small and large code models supported now"); + } +} + SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, @@ -2898,6 +2929,7 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index bb0523a2bd3..358b5a1b216 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -306,6 +306,8 @@ public: SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 23d81fc478e..4c35b466f14 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -4539,6 +4539,7 @@ def : ADRP_ADD; def : ADRP_ADD; def : ADRP_ADD; def : ADRP_ADD; +def : ADRP_ADD; //===----------------------------------------------------------------------===// // GOT access patterns diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index fbf1a471683..3ddeed4d93a 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -69,6 +69,50 @@ def SDT_assertext : SDTypeProfile<1, 1, def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>; def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>; +//===----------------------------------------------------------------------===// +// Addressing-mode instantiations +//===----------------------------------------------------------------------===// + +multiclass ls_64_pats { +defm : ls_neutral_pats; +} + +multiclass ls_128_pats { +defm : ls_neutral_pats; +} + +multiclass uimm12_neon_pats { + defm : ls_64_pats; + defm : ls_64_pats; + defm : ls_64_pats; + defm : ls_64_pats; + defm : ls_64_pats; + defm : ls_64_pats; + + defm : ls_128_pats; + defm : ls_128_pats; + defm : ls_128_pats; + defm : ls_128_pats; + defm : ls_128_pats; + defm : ls_128_pats; +} + +defm : uimm12_neon_pats<(A64WrapperSmall + tconstpool:$Hi, tconstpool:$Lo12, ALIGN), + (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>; + //===----------------------------------------------------------------------===// // Multiclasses //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/AArch64/neon-mov.ll b/test/CodeGen/AArch64/neon-mov.ll index 2fe91310072..4035b914b56 100644 --- a/test/CodeGen/AArch64/neon-mov.ll +++ b/test/CodeGen/AArch64/neon-mov.ll @@ -210,7 +210,9 @@ define <2 x i32> @movi1d_1() { declare <2 x i32> @test_movi1d(<2 x i32>, <2 x i32>) define <2 x i32> @movi1d() { -; CHECK: movi d1, #0xffffffff0000 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +; CHECK-NEXT: movi d1, #0xffffffff0000 %1 = tail call <2 x i32> @test_movi1d(<2 x i32> , <2 x i32> ) ret <2 x i32> %1 } diff --git a/test/CodeGen/AArch64/neon-simd-ldst-one.ll b/test/CodeGen/AArch64/neon-simd-ldst-one.ll index 3f28320f23d..9d49d2358fa 100644 --- a/test/CodeGen/AArch64/neon-simd-ldst-one.ll +++ b/test/CodeGen/AArch64/neon-simd-ldst-one.ll @@ -37,6 +37,87 @@ %struct.float32x2x4_t = type { [4 x <2 x float>] } %struct.float64x1x4_t = type { [4 x <1 x double>] } +define <16 x i8> @test_ld_from_poll_v16i8(<16 x i8> %a) { +; CHECK-LABEL: test_ld_from_poll_v16i8 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <16 x i8> %a, + ret <16 x i8> %b +} + +define <8 x i16> @test_ld_from_poll_v8i16(<8 x i16> %a) { +; CHECK-LABEL: test_ld_from_poll_v8i16 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <8 x i16> %a, + ret <8 x i16> %b +} + +define <4 x i32> @test_ld_from_poll_v4i32(<4 x i32> %a) { +; CHECK-LABEL: test_ld_from_poll_v4i32 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <4 x i32> %a, + ret <4 x i32> %b +} + +define <2 x i64> @test_ld_from_poll_v2i64(<2 x i64> %a) { +; CHECK-LABEL: test_ld_from_poll_v2i64 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <2 x i64> %a, + ret <2 x i64> %b +} + +define <4 x float> @test_ld_from_poll_v4f32(<4 x float> %a) { +; CHECK-LABEL: test_ld_from_poll_v4f32 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = fadd <4 x float> %a, + ret <4 x float> %b +} + +define <2 x double> @test_ld_from_poll_v2f64(<2 x double> %a) { +; CHECK-LABEL: test_ld_from_poll_v2f64 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = fadd <2 x double> %a, + ret <2 x double> %b +} + +define <8 x i8> @test_ld_from_poll_v8i8(<8 x i8> %a) { +; CHECK-LABEL: test_ld_from_poll_v8i8 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <8 x i8> %a, + ret <8 x i8> %b +} + +define <4 x i16> @test_ld_from_poll_v4i16(<4 x i16> %a) { +; CHECK-LABEL: test_ld_from_poll_v4i16 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <4 x i16> %a, + ret <4 x i16> %b +} + +define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) { +; CHECK-LABEL: test_ld_from_poll_v2i32 +; CHECK: adrp {{x[0-9]+}}, .{{[A-Z0-9_]+}} +; CHECK-NEXT: ldr {{d[0-9]+}}, [{{x[0-9]+}}, #:lo12:.{{[A-Z0-9_]+}}] +entry: + %b = add <2 x i32> %a, + ret <2 x i32> %b +} + define <16 x i8> @test_vld1q_dup_s8(i8* %a) { ; CHECK-LABEL: test_vld1q_dup_s8 ; CHECK: ld1r {{{v[0-9]+}}.16b}, [x0]