From 48c937e5c9808cc63d2cd04d823642d875c6bf0a Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Tue, 6 Apr 2004 17:34:50 +0000 Subject: [PATCH] Fix a minor bug in previous checking Enable folding of long seteq/setne comparisons into branches and select instructions Implement unfolded long relational comparisons against a constants a bit more efficiently Folding comparisons changes code that looks like this: mov %EAX, DWORD PTR [%ESP + 4] mov %EDX, DWORD PTR [%ESP + 8] mov %ECX, %EAX or %ECX, %EDX sete %CL test %CL, %CL je .LBB2 # PC rel: F into code that looks like this: mov %EAX, DWORD PTR [%ESP + 4] mov %EDX, DWORD PTR [%ESP + 8] mov %ECX, %EAX or %ECX, %EDX jne .LBB2 # PC rel: F This speeds up 186.crafty by 6% with llc-ls. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@12702 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/InstSelectSimple.cpp | 31 +++++++++++++++++++++++++++-- lib/Target/X86/X86ISelSimple.cpp | 31 +++++++++++++++++++++++++++-- 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp index 527bd1b135f..05d2d887d11 100644 --- a/lib/Target/X86/InstSelectSimple.cpp +++ b/lib/Target/X86/InstSelectSimple.cpp @@ -756,7 +756,9 @@ static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) { Instruction *User = cast(SCI->use_back()); if ((isa(User) || isa(User)) && SCI->getParent() == User->getParent() && - getClassB(SCI->getOperand(0)->getType()) != cLong) + (getClassB(SCI->getOperand(0)->getType()) != cLong || + SCI->getOpcode() == Instruction::SetEQ || + SCI->getOpcode() == Instruction::SetNE)) return SCI; } return 0; @@ -846,11 +848,36 @@ unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1, unsigned HiTmp = Op0r+1; if (HiCst != 0) { HiTmp = makeAnotherReg(Type::IntTy); - BuildMI(*MBB, IP, X86::XOR32rr, 2,HiTmp).addReg(Op0r+1).addImm(HiCst); + BuildMI(*MBB, IP, X86::XOR32ri, 2,HiTmp).addReg(Op0r+1).addImm(HiCst); } unsigned FinalTmp = makeAnotherReg(Type::IntTy); BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp); return OpNum; + } else { + // Emit a sequence of code which compares the high and low parts once + // each, then uses a conditional move to handle the overflow case. For + // example, a setlt for long would generate code like this: + // + // AL = lo(op1) < lo(op2) // Signedness depends on operands + // BL = hi(op1) < hi(op2) // Always unsigned comparison + // dest = hi(op1) == hi(op2) ? AL : BL; + // + + // FIXME: This would be much better if we had hierarchical register + // classes! Until then, hardcode registers so that we can deal with + // their aliases (because we don't have conditional byte moves). + // + BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst); + BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL); + BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst); + BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL); + BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH); + BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH); + BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX) + .addReg(X86::AX); + // NOTE: visitSetCondInst knows that the value is dumped into the BL + // register at this point for long values... + return OpNum; } } } diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp index 527bd1b135f..05d2d887d11 100644 --- a/lib/Target/X86/X86ISelSimple.cpp +++ b/lib/Target/X86/X86ISelSimple.cpp @@ -756,7 +756,9 @@ static SetCondInst *canFoldSetCCIntoBranchOrSelect(Value *V) { Instruction *User = cast(SCI->use_back()); if ((isa(User) || isa(User)) && SCI->getParent() == User->getParent() && - getClassB(SCI->getOperand(0)->getType()) != cLong) + (getClassB(SCI->getOperand(0)->getType()) != cLong || + SCI->getOpcode() == Instruction::SetEQ || + SCI->getOpcode() == Instruction::SetNE)) return SCI; } return 0; @@ -846,11 +848,36 @@ unsigned ISel::EmitComparison(unsigned OpNum, Value *Op0, Value *Op1, unsigned HiTmp = Op0r+1; if (HiCst != 0) { HiTmp = makeAnotherReg(Type::IntTy); - BuildMI(*MBB, IP, X86::XOR32rr, 2,HiTmp).addReg(Op0r+1).addImm(HiCst); + BuildMI(*MBB, IP, X86::XOR32ri, 2,HiTmp).addReg(Op0r+1).addImm(HiCst); } unsigned FinalTmp = makeAnotherReg(Type::IntTy); BuildMI(*MBB, IP, X86::OR32rr, 2, FinalTmp).addReg(LoTmp).addReg(HiTmp); return OpNum; + } else { + // Emit a sequence of code which compares the high and low parts once + // each, then uses a conditional move to handle the overflow case. For + // example, a setlt for long would generate code like this: + // + // AL = lo(op1) < lo(op2) // Signedness depends on operands + // BL = hi(op1) < hi(op2) // Always unsigned comparison + // dest = hi(op1) == hi(op2) ? AL : BL; + // + + // FIXME: This would be much better if we had hierarchical register + // classes! Until then, hardcode registers so that we can deal with + // their aliases (because we don't have conditional byte moves). + // + BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r).addImm(LowCst); + BuildMI(*MBB, IP, SetCCOpcodeTab[0][OpNum], 0, X86::AL); + BuildMI(*MBB, IP, X86::CMP32ri, 2).addReg(Op0r+1).addImm(HiCst); + BuildMI(*MBB, IP, SetCCOpcodeTab[CompTy->isSigned()][OpNum], 0,X86::BL); + BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::BH); + BuildMI(*MBB, IP, X86::IMPLICIT_DEF, 0, X86::AH); + BuildMI(*MBB, IP, X86::CMOVE16rr, 2, X86::BX).addReg(X86::BX) + .addReg(X86::AX); + // NOTE: visitSetCondInst knows that the value is dumped into the BL + // register at this point for long values... + return OpNum; } } }