From 5ca793561ed7cb19308dcf48b0f8d363e3c8c1df Mon Sep 17 00:00:00 2001 From: David Xu Date: Thu, 28 Aug 2014 04:59:53 +0000 Subject: [PATCH] Generate CMN when comparing a short int with minus git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216651 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 44 +++++++++++++++++++-- test/CodeGen/AArch64/cmpwithshort.ll | 46 ++++++++++++++++++++++ 2 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/AArch64/cmpwithshort.ll diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 4c12f49ad04..bcc8d05c8c7 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1117,6 +1117,8 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) { + SDValue Cmp; + AArch64CC::CondCode AArch64CC; if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { EVT VT = RHS.getValueType(); uint64_t C = RHSC->getZExtValue(); @@ -1171,9 +1173,45 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } } } - - SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); - AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); + // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095. + // For the i8 operand, the largest immediate is 255, so this can be easily + // encoded in the compare instruction. For the i16 operand, however, the + // largest immediate cannot be encoded in the compare. + // Therefore, use a sign extending load and cmn to avoid materializing the -1 + // constant. For example, + // movz w1, #65535 + // ldrh w0, [x0, #0] + // cmp w0, w1 + // > + // ldrsh w0, [x0, #0] + // cmn w0, #1 + // Fundamental, we're relying on the property that (zext LHS) == (zext RHS) + // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure + // both the LHS and RHS are truely zero extended and to make sure the + // transformation is profitable. + if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa(RHS)) { + if ((cast(RHS)->getZExtValue() >> 16 == 0) && + isa(LHS)) { + if (cast(LHS)->getExtensionType() == ISD::ZEXTLOAD && + cast(LHS)->getMemoryVT() == MVT::i16 && + LHS.getNode()->hasNUsesOfValue(1, 0)) { + int16_t ValueofRHS = cast(RHS)->getZExtValue(); + if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) { + SDValue SExt = + DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, + DAG.getValueType(MVT::i16)); + Cmp = emitComparison(SExt, + DAG.getConstant(ValueofRHS, RHS.getValueType()), + CC, dl, DAG); + AArch64CC = changeIntCCToAArch64CC(CC); + AArch64cc = DAG.getConstant(AArch64CC, MVT::i32); + return Cmp; + } + } + } + } + Cmp = emitComparison(LHS, RHS, CC, dl, DAG); + AArch64CC = changeIntCCToAArch64CC(CC); AArch64cc = DAG.getConstant(AArch64CC, MVT::i32); return Cmp; } diff --git a/test/CodeGen/AArch64/cmpwithshort.ll b/test/CodeGen/AArch64/cmpwithshort.ll new file mode 100644 index 00000000000..14efdcc9d18 --- /dev/null +++ b/test/CodeGen/AArch64/cmpwithshort.ll @@ -0,0 +1,46 @@ +; RUN: llc -O3 -march=aarch64 < %s | FileCheck %s + +define i16 @test_1cmp_signed_1(i16* %ptr1) { +; CHECK-LABLE: @test_1cmp_signed_1 +; CHECK: ldrsh +; CHECK-NEXT: cmn +entry: + %addr = getelementptr inbounds i16* %ptr1, i16 0 + %val = load i16* %addr, align 2 + %cmp = icmp eq i16 %val, -1 + br i1 %cmp, label %if, label %if.then +if: + ret i16 1 +if.then: + ret i16 0 +} + +define i16 @test_1cmp_signed_2(i16* %ptr1) { +; CHECK-LABLE: @test_1cmp_signed_2 +; CHECK: ldrsh +; CHECK-NEXT: cmn +entry: + %addr = getelementptr inbounds i16* %ptr1, i16 0 + %val = load i16* %addr, align 2 + %cmp = icmp sge i16 %val, -1 + br i1 %cmp, label %if, label %if.then +if: + ret i16 1 +if.then: + ret i16 0 +} + +define i16 @test_1cmp_unsigned_1(i16* %ptr1) { +; CHECK-LABLE: @test_1cmp_unsigned_1 +; CHECK: ldrsh +; CHECK-NEXT: cmn +entry: + %addr = getelementptr inbounds i16* %ptr1, i16 0 + %val = load i16* %addr, align 2 + %cmp = icmp uge i16 %val, -1 + br i1 %cmp, label %if, label %if.then +if: + ret i16 1 +if.then: + ret i16 0 +}