From 65b7f3af76d0ba5bce49b56ab3e18f970b95f9d1 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 21 Oct 2009 20:44:34 +0000 Subject: [PATCH] Improve handling of immediates by splitting 32-bit immediates into two 16-bit immediate operands when they will fit into the using instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@84778 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMAddressingModes.h | 60 +++++++++++++++++++++++++++++ lib/Target/ARM/ARMInstrInfo.td | 6 +++ lib/Target/ARM/ARMInstrThumb2.td | 33 ++++++++++++++++ test/CodeGen/Thumb2/thumb2-mov.ll | 56 +++++++++++++++------------ test/CodeGen/Thumb2/thumb2-mov2.ll | 4 +- 5 files changed, 132 insertions(+), 27 deletions(-) diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index edb74635a19..c603708652f 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -341,6 +341,66 @@ namespace ARM_AM { return -1; } + static inline unsigned getT2SOImmValRotate(unsigned V) { + if ((V & ~255U) == 0) return 0; + // Use CTZ to compute the rotate amount. + unsigned RotAmt = CountTrailingZeros_32(V); + return (32 - RotAmt) & 31; + } + + static inline bool isT2SOImmTwoPartVal (unsigned Imm) { + unsigned V = Imm; + // Passing values can be any combination of splat values and shifter + // values. If this can be handled with a single shifter or splat, bail + // out. Those should be handled directly, not with a two-part val. + if (getT2SOImmValSplatVal(V) != -1) + return false; + V = rotr32 (~255U, getT2SOImmValRotate(V)) & V; + if (V == 0) + return false; + + // If this can be handled as an immediate, accept. + if (getT2SOImmVal(V) != -1) return true; + + // Likewise, try masking out a splat value first. + V = Imm; + if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1) + V &= ~0xff00ff00U; + else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1) + V &= ~0x00ff00ffU; + // If what's left can be handled as an immediate, accept. + if (getT2SOImmVal(V) != -1) return true; + + // Otherwise, do not accept. + return false; + } + + static inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) { + assert (isT2SOImmTwoPartVal(Imm) && + "Immedate cannot be encoded as two part immediate!"); + // Try a shifter operand as one part + unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm; + // If the rest is encodable as an immediate, then return it. + if (getT2SOImmVal(V) != -1) return V; + + // Try masking out a splat value first. + if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1) + return Imm & 0xff00ff00U; + + // The other splat is all that's left as an option. + assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1); + return Imm & 0x00ff00ffU; + } + + static inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) { + // Mask out the first hunk + Imm ^= getT2SOImmTwoPartFirst(Imm); + // Return what's left + assert (getT2SOImmVal(Imm) != -1 && + "Unable to encode second part of T2 two part SO immediate"); + return Imm; + } + //===--------------------------------------------------------------------===// // Addressing Mode #2 diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index e6a0bfaa8d9..384b98cf540 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1583,6 +1583,12 @@ def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS), def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS), (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)), (so_imm2part_2 imm:$RHS))>; +def : ARMPat<(add GPR:$LHS, so_imm2part:$RHS), + (ADDri (ADDri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; +def : ARMPat<(sub GPR:$LHS, so_imm2part:$RHS), + (SUBri (SUBri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; // 32-bit immediate using movw + movt. // This is a single pseudo instruction, the benefit is that it can be remat'd diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index b151c99cdc2..2b6fa98ed3c 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -69,6 +69,25 @@ def t2_so_imm_neg : Operand, return ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())) != -1; }], t2_so_imm_neg_XFORM>; +// Break t2_so_imm's up into two pieces. This handles immediates with up to 16 +// bits set in them. This uses t2_so_imm2part to match and t2_so_imm2part_[12] +// to get the first/second pieces. +def t2_so_imm2part : Operand, + PatLeaf<(imm), [{ + return ARM_AM::isT2SOImmTwoPartVal((unsigned)N->getZExtValue()); + }]> { +} + +def t2_so_imm2part_1 : SDNodeXFormgetZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + +def t2_so_imm2part_2 : SDNodeXFormgetZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + /// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31]. def imm1_31 : PatLeaf<(i32 imm), [{ return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32; @@ -1131,6 +1150,20 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // Non-Instruction Patterns // +// Two piece so_imms. +def : T2Pat<(or GPR:$LHS, t2_so_imm2part:$RHS), + (t2ORRri (t2ORRri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), + (t2_so_imm2part_2 imm:$RHS))>; +def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS), + (t2EORri (t2EORri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), + (t2_so_imm2part_2 imm:$RHS))>; +def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS), + (t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), + (t2_so_imm2part_2 imm:$RHS))>; +def : T2Pat<(sub GPR:$LHS, t2_so_imm2part:$RHS), + (t2SUBri (t2SUBri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), + (t2_so_imm2part_2 imm:$RHS))>; + // ConstantPool, GlobalAddress, and JumpTable def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>; def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; diff --git a/test/CodeGen/Thumb2/thumb2-mov.ll b/test/CodeGen/Thumb2/thumb2-mov.ll index e9fdec8820e..8606e327a63 100644 --- a/test/CodeGen/Thumb2/thumb2-mov.ll +++ b/test/CodeGen/Thumb2/thumb2-mov.ll @@ -10,29 +10,32 @@ define i32 @t2_const_var2_1_ok_1(i32 %lhs) { ret i32 %ret } -define i32 @t2_const_var2_1_fail_1(i32 %lhs) { -;CHECK: t2_const_var2_1_fail_1: -;CHECK: movt +define i32 @t2_const_var2_1_ok_2(i32 %lhs) { +;CHECK: t2_const_var2_1_ok_2: +;CHECK: #11206656 +;CHECK: #187 %ret = add i32 %lhs, 11206843 ; 0x00ab00bb ret i32 %ret } -define i32 @t2_const_var2_1_fail_2(i32 %lhs) { -;CHECK: t2_const_var2_1_fail_2: -;CHECK: movt +define i32 @t2_const_var2_1_ok_3(i32 %lhs) { +;CHECK: t2_const_var2_1_ok_3: +;CHECK: #11206827 +;CHECK: #16777216 %ret = add i32 %lhs, 27984043 ; 0x01ab00ab ret i32 %ret } -define i32 @t2_const_var2_1_fail_3(i32 %lhs) { -;CHECK: t2_const_var2_1_fail_3: -;CHECK: movt +define i32 @t2_const_var2_1_ok_4(i32 %lhs) { +;CHECK: t2_const_var2_1_ok_4: +;CHECK: #16777472 +;CHECK: #11206827 %ret = add i32 %lhs, 27984299 ; 0x01ab01ab ret i32 %ret } -define i32 @t2_const_var2_1_fail_4(i32 %lhs) { -;CHECK: t2_const_var2_1_fail_4: +define i32 @t2_const_var2_1_fail_1(i32 %lhs) { +;CHECK: t2_const_var2_1_fail_1: ;CHECK: movt %ret = add i32 %lhs, 28027649 ; 0x01abab01 ret i32 %ret @@ -46,29 +49,31 @@ define i32 @t2_const_var2_2_ok_1(i32 %lhs) { ret i32 %ret } -define i32 @t2_const_var2_2_fail_1(i32 %lhs) { -;CHECK: t2_const_var2_2_fail_1: -;CHECK: movt +define i32 @t2_const_var2_2_ok_2(i32 %lhs) { +;CHECK: t2_const_var2_2_ok_2: +;CHECK: #-1426063360 +;CHECK: #47616 %ret = add i32 %lhs, 2868951552 ; 0xab00ba00 ret i32 %ret } -define i32 @t2_const_var2_2_fail_2(i32 %lhs) { -;CHECK: t2_const_var2_2_fail_2: -;CHECK: movt +define i32 @t2_const_var2_2_ok_3(i32 %lhs) { +;CHECK: t2_const_var2_2_ok_3: +;CHECK: #-1426019584 %ret = add i32 %lhs, 2868947728 ; 0xab00ab10 ret i32 %ret } -define i32 @t2_const_var2_2_fail_3(i32 %lhs) { -;CHECK: t2_const_var2_2_fail_3: -;CHECK: movt +define i32 @t2_const_var2_2_ok_4(i32 %lhs) { +;CHECK: t2_const_var2_2_ok_4: +;CHECK: #-1426019584 +;CHECK: #1048592 %ret = add i32 %lhs, 2869996304 ; 0xab10ab10 ret i32 %ret } -define i32 @t2_const_var2_2_fail_4(i32 %lhs) { -;CHECK: t2_const_var2_2_fail_4: +define i32 @t2_const_var2_2_fail_1(i32 %lhs) { +;CHECK: t2_const_var2_2_fail_1: ;CHECK: movt %ret = add i32 %lhs, 279685904 ; 0x10abab10 ret i32 %ret @@ -125,9 +130,10 @@ define i32 @t2_const_var3_2_ok_1(i32 %lhs) { ret i32 %ret } -define i32 @t2_const_var3_2_fail_1(i32 %lhs) { -;CHECK: t2_const_var3_2_fail_1: -;CHECK: movt +define i32 @t2_const_var3_2_ok_2(i32 %lhs) { +;CHECK: t2_const_var3_2_ok_2: +;CHECK: #2097152 +;CHECK: #1843200 %ret = add i32 %lhs, 3940352 ; 0b00000000001111000010000000000000 ret i32 %ret } diff --git a/test/CodeGen/Thumb2/thumb2-mov2.ll b/test/CodeGen/Thumb2/thumb2-mov2.ll index 32d00759858..64e2ddcf3fe 100644 --- a/test/CodeGen/Thumb2/thumb2-mov2.ll +++ b/test/CodeGen/Thumb2/thumb2-mov2.ll @@ -55,10 +55,10 @@ define i32 @t2MOVTi16_test_3(i32 %a) { define i32 @t2MOVTi16_test_nomatch_1(i32 %a) { ; CHECK: t2MOVTi16_test_nomatch_1: -; CHECK: movw r1, #16384 -; CHECK-NEXT: movt r1, #154 +; CHECK: #8388608 ; CHECK: movw r1, #65535 ; CHECK-NEXT: movt r1, #154 +; CHECK: #1720320 %1 = shl i32 255, 8 %2 = shl i32 1234, 8 %3 = or i32 %1, 255 ; This give us 0xFFFF in %3