[FastIsel][X86] Fix invalid register replacement for bool args

Summary:
Consider the following IR:

  %3 = load i8* undef
  %4 = trunc i8 %3 to i1
  %5 = call %jl_value_t.0* @foo(..., i1 %4, ...)
  ret %jl_value_t.0* %5

Bools (that are the result of direct truncs) are lowered as whatever
the argument to the trunc was and a "and 1", causing the part of the
MBB responsible for this argument to look something like this:

  %vreg8<def,tied1> = AND8ri %vreg7<kill,tied0>, 1, %EFLAGS<imp-def>; GR8:%vreg8,%vreg7

Later, when the load is lowered, it will insert

  %vreg15<def> = MOV8rm %vreg14, 1, %noreg, 0, %noreg; mem:LD1[undef] GR8:%vreg15 GR64:%vreg14

but remember to (at the end of isel) replace vreg7 by vreg15. Now for
the bug. In fast isel lowering, we mistakenly mark vreg8 as the result
of the load instead of the trunc. This adds a fixup to have
vreg8 replaced by whatever the result of the load is as well, so
we end up with

  %vreg15<def,tied1> = AND8ri %vreg15<kill,tied0>, 1, %EFLAGS<imp-def>; GR8:%vreg15

which is an SSA violation and causes problems later down the road.

This fixes PR21557.

Test Plan: Test test case from PR21557 is added to the test suite.

Reviewers: ributzka

Reviewed By: ributzka

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D6245

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224884 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Keno Fischer 2014-12-27 13:10:15 +00:00
parent aceb47b808
commit cc80af1b4f
2 changed files with 46 additions and 27 deletions

View File

@ -2672,6 +2672,9 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
TM.Options.GuaranteedTailCallOpt))
return false;
SmallVector<MVT, 16> OutVTs;
SmallVector<unsigned, 16> ArgRegs;
// If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
// instruction. This is safe because it is common to all FastISel supported
// calling conventions on x86.
@ -2689,28 +2692,34 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// Passing bools around ends up doing a trunc to i1 and passing it.
// Codegen this as an argument + "and 1".
if (auto *TI = dyn_cast<TruncInst>(Val)) {
if (TI->getType()->isIntegerTy(1) && CLI.CS &&
(TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
TI->hasOneUse()) {
Val = cast<TruncInst>(Val)->getOperand(0);
unsigned ResultReg = getRegForValue(Val);
MVT VT;
auto *TI = dyn_cast<TruncInst>(Val);
unsigned ResultReg;
if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
(TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
TI->hasOneUse()) {
Value *PrevVal = TI->getOperand(0);
ResultReg = getRegForValue(PrevVal);
if (!ResultReg)
return false;
if (!ResultReg)
return false;
MVT ArgVT;
if (!isTypeLegal(Val->getType(), ArgVT))
return false;
if (!isTypeLegal(PrevVal->getType(), VT))
return false;
ResultReg =
fastEmit_ri(ArgVT, ArgVT, ISD::AND, ResultReg, Val->hasOneUse(), 1);
ResultReg =
fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
if (!ResultReg)
return false;
updateValueMap(Val, ResultReg);
}
if (!ResultReg)
return false;
} else {
if (!isTypeLegal(Val->getType(), VT))
return false;
ResultReg = getRegForValue(Val);
}
ArgRegs.push_back(ResultReg);
OutVTs.push_back(VT);
}
// Analyze operands of the call, assigning locations to each operand.
@ -2721,13 +2730,6 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (IsWin64)
CCInfo.AllocateStack(32, 8);
SmallVector<MVT, 16> OutVTs;
for (auto *Val : OutVals) {
MVT VT;
if (!isTypeLegal(Val->getType(), VT))
return false;
OutVTs.push_back(VT);
}
CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
// Get a count of how many bytes are to be pushed on the stack.
@ -2749,9 +2751,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (ArgVT == MVT::x86mmx)
return false;
unsigned ArgReg = getRegForValue(ArgVal);
if (!ArgReg)
return false;
unsigned ArgReg = ArgRegs[VA.getValNo()];
// Promote the value if needed.
switch (VA.getLocInfo()) {

View File

@ -0,0 +1,19 @@
; RUN: llc < %s -fast-isel -mcpu=core2 -O1 | FileCheck %s
; See PR21557
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin14.0.0"
declare i64 @bar(i1)
define i64 @foo(i8* %arg) {
; CHECK-LABEL: foo:
top:
%0 = load i8* %arg
; CHECK: movb
%1 = trunc i8 %0 to i1
; CHECK: andb $1,
%2 = call i64 @bar(i1 %1)
; CHECK: callq
ret i64 %2
}