From 2bce5f4b56ac0ea8e452a79e13abba1deca9b7b6 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 28 Apr 2010 08:30:49 +0000 Subject: [PATCH] Enable i16 to i32 promotion by default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@102493 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 3 ++ lib/Target/X86/X86ISelLowering.cpp | 24 ++++++----- lib/Target/X86/X86Instr64bit.td | 5 +++ lib/Target/X86/X86InstrInfo.td | 13 +++--- lib/Target/X86/X86Subtarget.cpp | 6 --- lib/Target/X86/X86Subtarget.h | 5 --- test/CodeGen/X86/2008-07-11-SpillerBug.ll | 1 + test/CodeGen/X86/2008-10-16-SpillerBug.ll | 4 +- test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll | 2 +- test/CodeGen/X86/atomic_add.ll | 2 +- test/CodeGen/X86/h-registers-0.ll | 42 +++++++++++++++++-- test/CodeGen/X86/ins_subreg_coalesce-1.ll | 10 ++++- test/CodeGen/X86/promote-i16.ll | 11 +++++ test/CodeGen/X86/store-narrow.ll | 4 +- test/CodeGen/X86/tls11.ll | 4 +- test/CodeGen/X86/xor.ll | 8 ++-- 16 files changed, 98 insertions(+), 46 deletions(-) create mode 100644 test/CodeGen/X86/promote-i16.ll diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index d6744d16df3..1c4263922b3 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1854,6 +1854,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to // use a smaller encoding. + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) + // Look past the truncate if CMP is the only use of it. + N0 = N0.getOperand(0); if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && N0.getValueType() != MVT::i8 && X86::isZeroNode(N1)) { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 3d87ffb735b..4c0c5f6d432 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6075,7 +6075,7 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, // the encoding for the i16 version is larger than the i32 version. // Also promote i16 to i32 for performance / code size reason. if (LHS.getValueType() == MVT::i8 || - (Subtarget->shouldPromote16Bit() && LHS.getValueType() == MVT::i16)) + LHS.getValueType() == MVT::i16) LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); // If the operand types disagree, extend the shift amount to match. Since @@ -9949,7 +9949,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { if (!isTypeLegal(VT)) return false; - if (!Subtarget->shouldPromote16Bit() || VT != MVT::i16) + if (VT != MVT::i16) return true; switch (Opc) { @@ -9983,9 +9983,6 @@ static bool MayFoldIntoStore(SDValue Op) { /// beneficial for dag combiner to promote the specified node. If true, it /// should return the desired promotion type by reference. bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { - if (!Subtarget->shouldPromote16Bit()) - return false; - EVT VT = Op.getValueType(); if (VT != MVT::i16) return false; @@ -9998,10 +9995,16 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { LoadSDNode *LD = cast(Op); // If the non-extending load has a single use and it's not live out, then it // might be folded. - if (LD->getExtensionType() == ISD::NON_EXTLOAD && - Op.hasOneUse() && - Op.getNode()->use_begin()->getOpcode() != ISD::CopyToReg) - return false; + if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&& + Op.hasOneUse()*/) { + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) { + // The only case where we'd want to promote LOAD (rather then it being + // promoted as an operand is when it's only use is liveout. + if (UI->getOpcode() != ISD::CopyToReg) + return false; + } + } Promote = true; break; } @@ -10011,8 +10014,7 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { Promote = true; break; case ISD::SHL: - case ISD::SRL: - { + case ISD::SRL: { SDValue N0 = Op.getOperand(0); // Look out for (store (shl (load), x)). if (MayFoldLoad(N0) && MayFoldIntoStore(Op)) diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 7b1cdbcf4ac..0b5ea3f5375 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -2086,6 +2086,11 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; +def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), + (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + x86_subreg_8bit_hi))>, + Requires<[In64BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32_NOREXrr8 diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 751bbb4ffcd..460643a96f0 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -331,8 +331,6 @@ def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; def HasAES : Predicate<"Subtarget->hasAES()">; -def Promote16Bit : Predicate<"Subtarget->shouldPromote16Bit()">; -def NotPromote16Bit : Predicate<"!Subtarget->shouldPromote16Bit()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. @@ -4450,12 +4448,10 @@ def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; // avoid partial-register updates. def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; -def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>, - Requires<[NotPromote16Bit]>; +// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. def : Pat<(i32 (anyext GR16:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>, - Requires<[Promote16Bit]>; + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; //===----------------------------------------------------------------------===// @@ -4546,6 +4542,11 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), GR32_ABCD)), x86_subreg_8bit_hi))>, Requires<[In32BitMode]>; +def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), + (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + x86_subreg_8bit_hi))>, + Requires<[In32BitMode]>; // (shl x, 1) ==> (add x, x) def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 7ac7b76785c..09a26858eb7 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -16,7 +16,6 @@ #include "X86InstrInfo.h" #include "X86GenSubtarget.inc" #include "llvm/GlobalValue.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Host.h" @@ -25,10 +24,6 @@ #include "llvm/ADT/SmallVector.h" using namespace llvm; -static cl::opt -DoPromote16Bit("promote-16bit", cl::Hidden, - cl::desc("Promote 16-bit instructions")); - #if defined(_MSC_VER) #include #endif @@ -298,7 +293,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , IsBTMemSlow(false) , IsUAMemFast(false) , HasVectorUAMem(false) - , Promote16Bit(DoPromote16Bit) , DarwinVers(0) , stackAlignment(8) // FIXME: this is a known good value for Yonah. How about others? diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index dc990189045..646af91517f 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -88,10 +88,6 @@ protected: /// operands. This may require setting a feature bit in the processor. bool HasVectorUAMem; - /// Promote16Bit - True if codegen should promote 16-bit operations to 32-bit. - /// This is a temporary option. - bool Promote16Bit; - /// DarwinVers - Nonzero if this is a darwin platform: the numeric /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. unsigned char DarwinVers; // Is any darwin-x86 platform. @@ -160,7 +156,6 @@ public: bool isBTMemSlow() const { return IsBTMemSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } - bool shouldPromote16Bit() const { return Promote16Bit; } bool isTargetDarwin() const { return TargetType == isDarwin; } bool isTargetELF() const { return TargetType == isELF; } diff --git a/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/test/CodeGen/X86/2008-07-11-SpillerBug.ll index 548b44db6d2..d0023b28c6d 100644 --- a/test/CodeGen/X86/2008-07-11-SpillerBug.ll +++ b/test/CodeGen/X86/2008-07-11-SpillerBug.ll @@ -3,6 +3,7 @@ ; CHECK: andl $65534, % ; CHECK-NEXT: movl % +; CHECK-NEXT: movzwl ; CHECK-NEXT: movl $17 @g_5 = external global i16 ; [#uses=2] diff --git a/test/CodeGen/X86/2008-10-16-SpillerBug.ll b/test/CodeGen/X86/2008-10-16-SpillerBug.ll index f811230ce43..87305a0b311 100644 --- a/test/CodeGen/X86/2008-10-16-SpillerBug.ll +++ b/test/CodeGen/X86/2008-10-16-SpillerBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 40 +; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 41 ; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | FileCheck %s %struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 } @@ -63,13 +63,13 @@ define void @t(%struct.XXDState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._XXVMConstants* %cnstn, %struct.YYToken* %pstrm, %struct.XXVMVPContext* %vmctx, %struct.XXVMTextures* %txtrs, %struct.XXVMVPStack* %vpstk, <4 x float>* %atr0, <4 x float>* %atr1, <4 x float>* %atr2, <4 x float>* %atr3, <4 x float>* %vtx0, <4 x float>* %vtx1, <4 x float>* %vtx2, <4 x float>* %vtx3, [4 x <4 x float>]* %tmpGbl, i32* %oldMsk, <4 x i32>* %adrGbl, i64 %key_token) nounwind { entry: ; CHECK: t: -; CHECK: xorl %ecx, %ecx %0 = trunc i64 %key_token to i32 ; [#uses=1] %1 = getelementptr %struct.YYToken* %pstrm, i32 %0 ; <%struct.YYToken*> [#uses=5] br label %bb1132 bb51: ; preds = %bb1132 ; CHECK: .align 4 +; CHECK: xorl %ecx, %ecx ; CHECK: andl $7 %2 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec, i32 0, i32 0 ; [#uses=1] %3 = load i16* %2, align 1 ; [#uses=3] diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll index abbe97ac193..69f644f5834 100644 --- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll +++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll @@ -4,7 +4,7 @@ ; rdar://6808032 ; CHECK: pextrw $14 -; CHECK-NEXT: movzbl +; CHECK-NEXT: shrl $8 ; CHECK-NEXT: (%ebp) ; CHECK-NEXT: pinsrw diff --git a/test/CodeGen/X86/atomic_add.ll b/test/CodeGen/X86/atomic_add.ll index d00f8e861c2..26d25e24dfb 100644 --- a/test/CodeGen/X86/atomic_add.ll +++ b/test/CodeGen/X86/atomic_add.ll @@ -192,7 +192,7 @@ entry: define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp { entry: ; CHECK: sub2: -; CHECK: subw +; CHECK: negl %0 = trunc i32 %v to i16 ; [#uses=1] %1 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 %0) ; [#uses=0] ret void diff --git a/test/CodeGen/X86/h-registers-0.ll b/test/CodeGen/X86/h-registers-0.ll index 878fd93b737..e84bb9a34a2 100644 --- a/test/CodeGen/X86/h-registers-0.ll +++ b/test/CodeGen/X86/h-registers-0.ll @@ -1,12 +1,16 @@ -; RUN: llc < %s -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 4 -; RUN: llc < %s -march=x86 > %t -; RUN: grep {incb %ah} %t | count 3 -; RUN: grep {movzbl %ah,} %t | count 3 +; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X86-64 +; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X86-32 ; Use h registers. On x86-64, codegen doesn't support general allocation ; of h registers yet, due to x86 encoding complications. define void @bar64(i64 inreg %x, i8* inreg %p) nounwind { +; X86-64: bar64: +; X86-64: shrq $8, %rdi +; X86-64: incb %dil + +; X86-32: bar64: +; X86-32: incb %ah %t0 = lshr i64 %x, 8 %t1 = trunc i64 %t0 to i8 %t2 = add i8 %t1, 1 @@ -15,6 +19,12 @@ define void @bar64(i64 inreg %x, i8* inreg %p) nounwind { } define void @bar32(i32 inreg %x, i8* inreg %p) nounwind { +; X86-64: bar32: +; X86-64: shrl $8, %edi +; X86-64: incb %dil + +; X86-32: bar32: +; X86-32: incb %ah %t0 = lshr i32 %x, 8 %t1 = trunc i32 %t0 to i8 %t2 = add i8 %t1, 1 @@ -23,6 +33,12 @@ define void @bar32(i32 inreg %x, i8* inreg %p) nounwind { } define void @bar16(i16 inreg %x, i8* inreg %p) nounwind { +; X86-64: bar16: +; X86-64: shrl $8, %edi +; X86-64: incb %dil + +; X86-32: bar16: +; X86-32: incb %ah %t0 = lshr i16 %x, 8 %t1 = trunc i16 %t0 to i8 %t2 = add i8 %t1, 1 @@ -31,18 +47,36 @@ define void @bar16(i16 inreg %x, i8* inreg %p) nounwind { } define i64 @qux64(i64 inreg %x) nounwind { +; X86-64: qux64: +; X86-64: movq %rdi, %rax +; X86-64: movzbl %ah, %eax + +; X86-32: qux64: +; X86-32: movzbl %ah, %eax %t0 = lshr i64 %x, 8 %t1 = and i64 %t0, 255 ret i64 %t1 } define i32 @qux32(i32 inreg %x) nounwind { +; X86-64: qux32: +; X86-64: movl %edi, %eax +; X86-64: movzbl %ah, %eax + +; X86-32: qux32: +; X86-32: movzbl %ah, %eax %t0 = lshr i32 %x, 8 %t1 = and i32 %t0, 255 ret i32 %t1 } define i16 @qux16(i16 inreg %x) nounwind { +; X86-64: qux16: +; X86-64: movl %edi, %eax +; X86-64: movzbl %ah, %eax + +; X86-32: qux16: +; X86-32: movzbl %ah, %eax %t0 = lshr i16 %x, 8 ret i16 %t0 } diff --git a/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/test/CodeGen/X86/ins_subreg_coalesce-1.ll index 2243f93f3dd..83674361a77 100644 --- a/test/CodeGen/X86/ins_subreg_coalesce-1.ll +++ b/test/CodeGen/X86/ins_subreg_coalesce-1.ll @@ -1,7 +1,13 @@ -; RUN: llc < %s -march=x86 | grep mov | count 3 +; RUN: llc < %s -march=x86 | FileCheck %s -define fastcc i32 @sqlite3ExprResolveNames() nounwind { +define fastcc i32 @t() nounwind { entry: +; CHECK: t: +; CHECK: movzwl 0, %eax +; CHECK: orl $2, %eax +; CHECK: movw %ax, 0 +; CHECK: shrl $3, %eax +; CHECK: andl $1, %eax br i1 false, label %UnifiedReturnBlock, label %bb4 bb4: ; preds = %entry br i1 false, label %bb17, label %bb22 diff --git a/test/CodeGen/X86/promote-i16.ll b/test/CodeGen/X86/promote-i16.ll new file mode 100644 index 00000000000..101bb29593c --- /dev/null +++ b/test/CodeGen/X86/promote-i16.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=x86 | FileCheck %s + +define signext i16 @foo(i16 signext %x) nounwind { +entry: +; CHECK: foo: +; CHECK: movzwl 4(%esp), %eax +; CHECK: xorl $21998, %eax +; CHECK: movswl %ax, %eax + %0 = xor i16 %x, 21998 + ret i16 %0 +} diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll index a6566979e47..b1100fa960c 100644 --- a/test/CodeGen/X86/store-narrow.ll +++ b/test/CodeGen/X86/store-narrow.ll @@ -67,7 +67,7 @@ entry: ; X64: movw %si, 2(%rdi) ; X32: test4: -; X32: movw 8(%esp), %ax +; X32: movzwl 8(%esp), %eax ; X32: movw %ax, 2(%{{.*}}) } @@ -84,7 +84,7 @@ entry: ; X64: movw %si, 2(%rdi) ; X32: test5: -; X32: movw 8(%esp), %ax +; X32: movzwl 8(%esp), %eax ; X32: movw %ax, 2(%{{.*}}) } diff --git a/test/CodeGen/X86/tls11.ll b/test/CodeGen/X86/tls11.ll index a2c1a1f75de..514a168c538 100644 --- a/test/CodeGen/X86/tls11.ll +++ b/test/CodeGen/X86/tls11.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t -; RUN: grep {movw %gs:i@NTPOFF, %ax} %t +; RUN: grep {movzwl %gs:i@NTPOFF, %eax} %t ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2 -; RUN: grep {movw %fs:i@TPOFF, %ax} %t2 +; RUN: grep {movzwl %fs:i@TPOFF, %eax} %t2 @i = thread_local global i16 15 diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll index f270d9d56e2..6c623cb1553 100644 --- a/test/CodeGen/X86/xor.ll +++ b/test/CodeGen/X86/xor.ll @@ -80,11 +80,11 @@ bb: bb12: ret i16 %tmp3 ; X64: test5: -; X64: notw [[REG:%[a-z]+]] -; X64: andw {{.*}}[[REG]] +; X64: notl [[REG:%[a-z]+]] +; X64: andl {{.*}}[[REG]] ; X32: test5: -; X32: notw [[REG:%[a-z]+]] -; X32: andw {{.*}}[[REG]] +; X32: notl [[REG:%[a-z]+]] +; X32: andl {{.*}}[[REG]] } define i8 @test6(i8 %a, i8 %b) nounwind {