mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-12 13:30:51 +00:00
Enable i16 to i32 promotion by default.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@102493 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b3a3d5e858
commit
2bce5f4b56
@ -1854,6 +1854,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
|
||||
|
||||
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
|
||||
// use a smaller encoding.
|
||||
if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
|
||||
// Look past the truncate if CMP is the only use of it.
|
||||
N0 = N0.getOperand(0);
|
||||
if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
|
||||
N0.getValueType() != MVT::i8 &&
|
||||
X86::isZeroNode(N1)) {
|
||||
|
@ -6075,7 +6075,7 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
|
||||
// the encoding for the i16 version is larger than the i32 version.
|
||||
// Also promote i16 to i32 for performance / code size reason.
|
||||
if (LHS.getValueType() == MVT::i8 ||
|
||||
(Subtarget->shouldPromote16Bit() && LHS.getValueType() == MVT::i16))
|
||||
LHS.getValueType() == MVT::i16)
|
||||
LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
|
||||
|
||||
// If the operand types disagree, extend the shift amount to match. Since
|
||||
@ -9949,7 +9949,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
|
||||
if (!isTypeLegal(VT))
|
||||
return false;
|
||||
if (!Subtarget->shouldPromote16Bit() || VT != MVT::i16)
|
||||
if (VT != MVT::i16)
|
||||
return true;
|
||||
|
||||
switch (Opc) {
|
||||
@ -9983,9 +9983,6 @@ static bool MayFoldIntoStore(SDValue Op) {
|
||||
/// beneficial for dag combiner to promote the specified node. If true, it
|
||||
/// should return the desired promotion type by reference.
|
||||
bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
|
||||
if (!Subtarget->shouldPromote16Bit())
|
||||
return false;
|
||||
|
||||
EVT VT = Op.getValueType();
|
||||
if (VT != MVT::i16)
|
||||
return false;
|
||||
@ -9998,10 +9995,16 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
|
||||
LoadSDNode *LD = cast<LoadSDNode>(Op);
|
||||
// If the non-extending load has a single use and it's not live out, then it
|
||||
// might be folded.
|
||||
if (LD->getExtensionType() == ISD::NON_EXTLOAD &&
|
||||
Op.hasOneUse() &&
|
||||
Op.getNode()->use_begin()->getOpcode() != ISD::CopyToReg)
|
||||
return false;
|
||||
if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&&
|
||||
Op.hasOneUse()*/) {
|
||||
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
|
||||
UE = Op.getNode()->use_end(); UI != UE; ++UI) {
|
||||
// The only case where we'd want to promote LOAD (rather then it being
|
||||
// promoted as an operand is when it's only use is liveout.
|
||||
if (UI->getOpcode() != ISD::CopyToReg)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Promote = true;
|
||||
break;
|
||||
}
|
||||
@ -10011,8 +10014,7 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
|
||||
Promote = true;
|
||||
break;
|
||||
case ISD::SHL:
|
||||
case ISD::SRL:
|
||||
{
|
||||
case ISD::SRL: {
|
||||
SDValue N0 = Op.getOperand(0);
|
||||
// Look out for (store (shl (load), x)).
|
||||
if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
|
||||
|
@ -2086,6 +2086,11 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
|
||||
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
|
||||
x86_subreg_8bit_hi))>,
|
||||
Requires<[In64BitMode]>;
|
||||
def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
|
||||
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
|
||||
GR32_ABCD)),
|
||||
x86_subreg_8bit_hi))>,
|
||||
Requires<[In64BitMode]>;
|
||||
def : Pat<(srl GR16:$src, (i8 8)),
|
||||
(EXTRACT_SUBREG
|
||||
(MOVZX32_NOREXrr8
|
||||
|
@ -331,8 +331,6 @@ def OptForSpeed : Predicate<"!OptForSize">;
|
||||
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
|
||||
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
|
||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||
def Promote16Bit : Predicate<"Subtarget->shouldPromote16Bit()">;
|
||||
def NotPromote16Bit : Predicate<"!Subtarget->shouldPromote16Bit()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Instruction Format Definitions.
|
||||
@ -4450,12 +4448,10 @@ def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
|
||||
// avoid partial-register updates.
|
||||
def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
|
||||
def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
|
||||
def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>,
|
||||
Requires<[NotPromote16Bit]>;
|
||||
|
||||
// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
|
||||
def : Pat<(i32 (anyext GR16:$src)),
|
||||
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>,
|
||||
Requires<[Promote16Bit]>;
|
||||
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -4546,6 +4542,11 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
|
||||
GR32_ABCD)),
|
||||
x86_subreg_8bit_hi))>,
|
||||
Requires<[In32BitMode]>;
|
||||
def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
|
||||
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
|
||||
GR32_ABCD)),
|
||||
x86_subreg_8bit_hi))>,
|
||||
Requires<[In32BitMode]>;
|
||||
|
||||
// (shl x, 1) ==> (add x, x)
|
||||
def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
|
||||
|
@ -16,7 +16,6 @@
|
||||
#include "X86InstrInfo.h"
|
||||
#include "X86GenSubtarget.inc"
|
||||
#include "llvm/GlobalValue.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/System/Host.h"
|
||||
@ -25,10 +24,6 @@
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool>
|
||||
DoPromote16Bit("promote-16bit", cl::Hidden,
|
||||
cl::desc("Promote 16-bit instructions"));
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
@ -298,7 +293,6 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
|
||||
, IsBTMemSlow(false)
|
||||
, IsUAMemFast(false)
|
||||
, HasVectorUAMem(false)
|
||||
, Promote16Bit(DoPromote16Bit)
|
||||
, DarwinVers(0)
|
||||
, stackAlignment(8)
|
||||
// FIXME: this is a known good value for Yonah. How about others?
|
||||
|
@ -88,10 +88,6 @@ protected:
|
||||
/// operands. This may require setting a feature bit in the processor.
|
||||
bool HasVectorUAMem;
|
||||
|
||||
/// Promote16Bit - True if codegen should promote 16-bit operations to 32-bit.
|
||||
/// This is a temporary option.
|
||||
bool Promote16Bit;
|
||||
|
||||
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
|
||||
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
|
||||
unsigned char DarwinVers; // Is any darwin-x86 platform.
|
||||
@ -160,7 +156,6 @@ public:
|
||||
bool isBTMemSlow() const { return IsBTMemSlow; }
|
||||
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
|
||||
bool hasVectorUAMem() const { return HasVectorUAMem; }
|
||||
bool shouldPromote16Bit() const { return Promote16Bit; }
|
||||
|
||||
bool isTargetDarwin() const { return TargetType == isDarwin; }
|
||||
bool isTargetELF() const { return TargetType == isELF; }
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
; CHECK: andl $65534, %
|
||||
; CHECK-NEXT: movl %
|
||||
; CHECK-NEXT: movzwl
|
||||
; CHECK-NEXT: movl $17
|
||||
|
||||
@g_5 = external global i16 ; <i16*> [#uses=2]
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 40
|
||||
; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 41
|
||||
; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | FileCheck %s
|
||||
|
||||
%struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
|
||||
@ -63,13 +63,13 @@
|
||||
define void @t(%struct.XXDState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._XXVMConstants* %cnstn, %struct.YYToken* %pstrm, %struct.XXVMVPContext* %vmctx, %struct.XXVMTextures* %txtrs, %struct.XXVMVPStack* %vpstk, <4 x float>* %atr0, <4 x float>* %atr1, <4 x float>* %atr2, <4 x float>* %atr3, <4 x float>* %vtx0, <4 x float>* %vtx1, <4 x float>* %vtx2, <4 x float>* %vtx3, [4 x <4 x float>]* %tmpGbl, i32* %oldMsk, <4 x i32>* %adrGbl, i64 %key_token) nounwind {
|
||||
entry:
|
||||
; CHECK: t:
|
||||
; CHECK: xorl %ecx, %ecx
|
||||
%0 = trunc i64 %key_token to i32 ; <i32> [#uses=1]
|
||||
%1 = getelementptr %struct.YYToken* %pstrm, i32 %0 ; <%struct.YYToken*> [#uses=5]
|
||||
br label %bb1132
|
||||
|
||||
bb51: ; preds = %bb1132
|
||||
; CHECK: .align 4
|
||||
; CHECK: xorl %ecx, %ecx
|
||||
; CHECK: andl $7
|
||||
%2 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec, i32 0, i32 0 ; <i16*> [#uses=1]
|
||||
%3 = load i16* %2, align 1 ; <i16> [#uses=3]
|
||||
|
@ -4,7 +4,7 @@
|
||||
; rdar://6808032
|
||||
|
||||
; CHECK: pextrw $14
|
||||
; CHECK-NEXT: movzbl
|
||||
; CHECK-NEXT: shrl $8
|
||||
; CHECK-NEXT: (%ebp)
|
||||
; CHECK-NEXT: pinsrw
|
||||
|
||||
|
@ -192,7 +192,7 @@ entry:
|
||||
define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp {
|
||||
entry:
|
||||
; CHECK: sub2:
|
||||
; CHECK: subw
|
||||
; CHECK: negl
|
||||
%0 = trunc i32 %v to i16 ; <i16> [#uses=1]
|
||||
%1 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 %0) ; <i16> [#uses=0]
|
||||
ret void
|
||||
|
@ -1,12 +1,16 @@
|
||||
; RUN: llc < %s -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 4
|
||||
; RUN: llc < %s -march=x86 > %t
|
||||
; RUN: grep {incb %ah} %t | count 3
|
||||
; RUN: grep {movzbl %ah,} %t | count 3
|
||||
; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X86-64
|
||||
; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X86-32
|
||||
|
||||
; Use h registers. On x86-64, codegen doesn't support general allocation
|
||||
; of h registers yet, due to x86 encoding complications.
|
||||
|
||||
define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
|
||||
; X86-64: bar64:
|
||||
; X86-64: shrq $8, %rdi
|
||||
; X86-64: incb %dil
|
||||
|
||||
; X86-32: bar64:
|
||||
; X86-32: incb %ah
|
||||
%t0 = lshr i64 %x, 8
|
||||
%t1 = trunc i64 %t0 to i8
|
||||
%t2 = add i8 %t1, 1
|
||||
@ -15,6 +19,12 @@ define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
|
||||
}
|
||||
|
||||
define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
|
||||
; X86-64: bar32:
|
||||
; X86-64: shrl $8, %edi
|
||||
; X86-64: incb %dil
|
||||
|
||||
; X86-32: bar32:
|
||||
; X86-32: incb %ah
|
||||
%t0 = lshr i32 %x, 8
|
||||
%t1 = trunc i32 %t0 to i8
|
||||
%t2 = add i8 %t1, 1
|
||||
@ -23,6 +33,12 @@ define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
|
||||
}
|
||||
|
||||
define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
|
||||
; X86-64: bar16:
|
||||
; X86-64: shrl $8, %edi
|
||||
; X86-64: incb %dil
|
||||
|
||||
; X86-32: bar16:
|
||||
; X86-32: incb %ah
|
||||
%t0 = lshr i16 %x, 8
|
||||
%t1 = trunc i16 %t0 to i8
|
||||
%t2 = add i8 %t1, 1
|
||||
@ -31,18 +47,36 @@ define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
|
||||
}
|
||||
|
||||
define i64 @qux64(i64 inreg %x) nounwind {
|
||||
; X86-64: qux64:
|
||||
; X86-64: movq %rdi, %rax
|
||||
; X86-64: movzbl %ah, %eax
|
||||
|
||||
; X86-32: qux64:
|
||||
; X86-32: movzbl %ah, %eax
|
||||
%t0 = lshr i64 %x, 8
|
||||
%t1 = and i64 %t0, 255
|
||||
ret i64 %t1
|
||||
}
|
||||
|
||||
define i32 @qux32(i32 inreg %x) nounwind {
|
||||
; X86-64: qux32:
|
||||
; X86-64: movl %edi, %eax
|
||||
; X86-64: movzbl %ah, %eax
|
||||
|
||||
; X86-32: qux32:
|
||||
; X86-32: movzbl %ah, %eax
|
||||
%t0 = lshr i32 %x, 8
|
||||
%t1 = and i32 %t0, 255
|
||||
ret i32 %t1
|
||||
}
|
||||
|
||||
define i16 @qux16(i16 inreg %x) nounwind {
|
||||
; X86-64: qux16:
|
||||
; X86-64: movl %edi, %eax
|
||||
; X86-64: movzbl %ah, %eax
|
||||
|
||||
; X86-32: qux16:
|
||||
; X86-32: movzbl %ah, %eax
|
||||
%t0 = lshr i16 %x, 8
|
||||
ret i16 %t0
|
||||
}
|
||||
|
@ -1,7 +1,13 @@
|
||||
; RUN: llc < %s -march=x86 | grep mov | count 3
|
||||
; RUN: llc < %s -march=x86 | FileCheck %s
|
||||
|
||||
define fastcc i32 @sqlite3ExprResolveNames() nounwind {
|
||||
define fastcc i32 @t() nounwind {
|
||||
entry:
|
||||
; CHECK: t:
|
||||
; CHECK: movzwl 0, %eax
|
||||
; CHECK: orl $2, %eax
|
||||
; CHECK: movw %ax, 0
|
||||
; CHECK: shrl $3, %eax
|
||||
; CHECK: andl $1, %eax
|
||||
br i1 false, label %UnifiedReturnBlock, label %bb4
|
||||
bb4: ; preds = %entry
|
||||
br i1 false, label %bb17, label %bb22
|
||||
|
11
test/CodeGen/X86/promote-i16.ll
Normal file
11
test/CodeGen/X86/promote-i16.ll
Normal file
@ -0,0 +1,11 @@
|
||||
; RUN: llc < %s -march=x86 | FileCheck %s
|
||||
|
||||
define signext i16 @foo(i16 signext %x) nounwind {
|
||||
entry:
|
||||
; CHECK: foo:
|
||||
; CHECK: movzwl 4(%esp), %eax
|
||||
; CHECK: xorl $21998, %eax
|
||||
; CHECK: movswl %ax, %eax
|
||||
%0 = xor i16 %x, 21998
|
||||
ret i16 %0
|
||||
}
|
@ -67,7 +67,7 @@ entry:
|
||||
; X64: movw %si, 2(%rdi)
|
||||
|
||||
; X32: test4:
|
||||
; X32: movw 8(%esp), %ax
|
||||
; X32: movzwl 8(%esp), %eax
|
||||
; X32: movw %ax, 2(%{{.*}})
|
||||
}
|
||||
|
||||
@ -84,7 +84,7 @@ entry:
|
||||
; X64: movw %si, 2(%rdi)
|
||||
|
||||
; X32: test5:
|
||||
; X32: movw 8(%esp), %ax
|
||||
; X32: movzwl 8(%esp), %eax
|
||||
; X32: movw %ax, 2(%{{.*}})
|
||||
}
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
|
||||
; RUN: grep {movw %gs:i@NTPOFF, %ax} %t
|
||||
; RUN: grep {movzwl %gs:i@NTPOFF, %eax} %t
|
||||
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
|
||||
; RUN: grep {movw %fs:i@TPOFF, %ax} %t2
|
||||
; RUN: grep {movzwl %fs:i@TPOFF, %eax} %t2
|
||||
|
||||
@i = thread_local global i16 15
|
||||
|
||||
|
@ -80,11 +80,11 @@ bb:
|
||||
bb12:
|
||||
ret i16 %tmp3
|
||||
; X64: test5:
|
||||
; X64: notw [[REG:%[a-z]+]]
|
||||
; X64: andw {{.*}}[[REG]]
|
||||
; X64: notl [[REG:%[a-z]+]]
|
||||
; X64: andl {{.*}}[[REG]]
|
||||
; X32: test5:
|
||||
; X32: notw [[REG:%[a-z]+]]
|
||||
; X32: andw {{.*}}[[REG]]
|
||||
; X32: notl [[REG:%[a-z]+]]
|
||||
; X32: andl {{.*}}[[REG]]
|
||||
}
|
||||
|
||||
define i8 @test6(i8 %a, i8 %b) nounwind {
|
||||
|
Loading…
Reference in New Issue
Block a user