From fe5c9cee80cea161963673083ca63cfd6d031bac Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 9 Apr 2014 20:43:40 +0000 Subject: [PATCH] [AArch64] Implement the isTruncateFree API. In AArch64 i64 to i32 truncate operation is a subregister access. This allows more opportunities for LSR optmization to eliminate variables of different types (i32 and i64). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205925 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 21 ++++++++++++ lib/Target/AArch64/AArch64ISelLowering.h | 4 +++ test/CodeGen/AArch64/eliminate-trunc.ll | 38 ++++++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 test/CodeGen/AArch64/eliminate-trunc.ll diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 2edb19281de..7accadc6a0f 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5368,3 +5368,24 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return false; } + +// Truncations from 64-bit GPR to 32-bit GPR is free. +bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) + return false; + unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); + unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); + if (NumBits1 <= NumBits2) + return false; + return true; +} + +bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { + if (!VT1.isInteger() || !VT2.isInteger()) + return false; + unsigned NumBits1 = VT1.getSizeInBits(); + unsigned NumBits2 = VT2.getSizeInBits(); + if (NumBits1 <= NumBits2) + return false; + return true; +} diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index e946b250e3f..6a4b0c791fe 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -277,6 +277,10 @@ public: SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; bool isLegalICmpImmediate(int64_t Val) const; + + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; + bool isTruncateFree(EVT VT1, EVT VT2) const override; + SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &A64cc, SelectionDAG &DAG, SDLoc &dl) const; diff --git a/test/CodeGen/AArch64/eliminate-trunc.ll b/test/CodeGen/AArch64/eliminate-trunc.ll new file mode 100644 index 00000000000..b8b47fd5787 --- /dev/null +++ b/test/CodeGen/AArch64/eliminate-trunc.ll @@ -0,0 +1,38 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s + +; Check trunc i64 operation is translated as a subregister access +; eliminating an i32 induction varible. +; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #1 +; CHECK-NOT: add {{w[0-9]+}}, {{w[0-9]+}}, #1 +; CHECK-NEXT: cmp {{w[0-9]+}}, {{w[0-9]+}}, uxtw +define void @test1_signed([8 x i8]* nocapture %a, i8* nocapture readonly %box, i8 %limit) { +entry: + %conv = zext i8 %limit to i32 + %cmp223 = icmp eq i8 %limit, 0 + br i1 %cmp223, label %for.end15, label %for.body4.lr.ph.us + +for.body4.us: + %indvars.iv = phi i64 [ 0, %for.body4.lr.ph.us ], [ %indvars.iv.next, %for.body4.us ] + %arrayidx6.us = getelementptr inbounds [8 x i8]* %a, i64 %indvars.iv26, i64 %indvars.iv + %0 = load i8* %arrayidx6.us, align 1 + %idxprom7.us = zext i8 %0 to i64 + %arrayidx8.us = getelementptr inbounds i8* %box, i64 %idxprom7.us + %1 = load i8* %arrayidx8.us, align 1 + store i8 %1, i8* %arrayidx6.us, align 1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %2 = trunc i64 %indvars.iv.next to i32 + %cmp2.us = icmp slt i32 %2, %conv + br i1 %cmp2.us, label %for.body4.us, label %for.cond1.for.inc13_crit_edge.us + +for.body4.lr.ph.us: + %indvars.iv26 = phi i64 [ %indvars.iv.next27, %for.cond1.for.inc13_crit_edge.us ], [ 0, %entry ] + br label %for.body4.us + +for.cond1.for.inc13_crit_edge.us: + %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1 + %exitcond28 = icmp eq i64 %indvars.iv26, 3 + br i1 %exitcond28, label %for.end15, label %for.body4.lr.ph.us + +for.end15: + ret void +}