[FastISel][AArch64] Fix the immediate versions of the {s|u}{add|sub}.with.overflow intrinsics.

ADDS and SUBS cannot encode negative immediates or immediates larger than 12bit.
This fix checks if the immediate version can be used under this constraints and
if we can convert ADDS to SUBS or vice versa to support negative immediates.

Also update the test cases to test the immediate versions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214470 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Juergen Ributzka 2014-08-01 01:25:55 +00:00
parent bcaf5e176a
commit 74ac16386b
2 changed files with 130 additions and 54 deletions

View File

@ -1759,59 +1759,53 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
return false; return false;
bool LHSIsKill = hasTrivialKill(LHS); bool LHSIsKill = hasTrivialKill(LHS);
unsigned RHSReg = 0; // Check if the immediate can be encoded in the instruction and if we should
bool RHSIsKill = false; // invert the instruction (adds -> subs) to handle negative immediates.
bool UseImm = true; bool UseImm = false;
if (!isa<ConstantInt>(RHS)) { bool UseInverse = false;
RHSReg = getRegForValue(RHS); uint64_t Imm = 0;
if (!RHSReg) if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
return false; if (C->isNegative()) {
RHSIsKill = hasTrivialKill(RHS); UseInverse = true;
UseImm = false; Imm = -(C->getSExtValue());
} else
Imm = C->getZExtValue();
if (isUInt<12>(Imm))
UseImm = true;
UseInverse = UseImm && UseInverse;
} }
static const unsigned OpcTable[2][2][2] = {
{ {AArch64::ADDSWrr, AArch64::ADDSXrr},
{AArch64::ADDSWri, AArch64::ADDSXri} },
{ {AArch64::SUBSWrr, AArch64::SUBSXrr},
{AArch64::SUBSWri, AArch64::SUBSXri} }
};
unsigned Opc = 0; unsigned Opc = 0;
unsigned MulReg = 0; unsigned MulReg = 0;
unsigned RHSReg = 0;
bool RHSIsKill = false;
AArch64CC::CondCode CC = AArch64CC::Invalid; AArch64CC::CondCode CC = AArch64CC::Invalid;
bool Is64Bit = VT == MVT::i64; bool Is64Bit = VT == MVT::i64;
switch (II->getIntrinsicID()) { switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!"); default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow: case Intrinsic::sadd_with_overflow:
if (UseImm) Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
Opc = Is64Bit ? AArch64::ADDSXri : AArch64::ADDSWri;
else
Opc = Is64Bit ? AArch64::ADDSXrr : AArch64::ADDSWrr;
CC = AArch64CC::VS;
break;
case Intrinsic::uadd_with_overflow: case Intrinsic::uadd_with_overflow:
if (UseImm) Opc = OpcTable[UseInverse][UseImm][Is64Bit]; CC = AArch64CC::HS; break;
Opc = Is64Bit ? AArch64::ADDSXri : AArch64::ADDSWri;
else
Opc = Is64Bit ? AArch64::ADDSXrr : AArch64::ADDSWrr;
CC = AArch64CC::HS;
break;
case Intrinsic::ssub_with_overflow: case Intrinsic::ssub_with_overflow:
if (UseImm) Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::VS; break;
Opc = Is64Bit ? AArch64::SUBSXri : AArch64::SUBSWri;
else
Opc = Is64Bit ? AArch64::SUBSXrr : AArch64::SUBSWrr;
CC = AArch64CC::VS;
break;
case Intrinsic::usub_with_overflow: case Intrinsic::usub_with_overflow:
if (UseImm) Opc = OpcTable[!UseInverse][UseImm][Is64Bit]; CC = AArch64CC::LO; break;
Opc = Is64Bit ? AArch64::SUBSXri : AArch64::SUBSWri;
else
Opc = Is64Bit ? AArch64::SUBSXrr : AArch64::SUBSWrr;
CC = AArch64CC::LO;
break;
case Intrinsic::smul_with_overflow: { case Intrinsic::smul_with_overflow: {
CC = AArch64CC::NE; CC = AArch64CC::NE;
if (UseImm) { RHSReg = getRegForValue(RHS);
RHSReg = getRegForValue(RHS); if (!RHSReg)
if (!RHSReg) return false;
return false; RHSIsKill = hasTrivialKill(RHS);
RHSIsKill = hasTrivialKill(RHS);
}
if (VT == MVT::i32) { if (VT == MVT::i32) {
MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); MulReg = Emit_SMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32); unsigned ShiftReg = Emit_LSR_ri(MVT::i64, MulReg, false, 32);
@ -1841,12 +1835,11 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
} }
case Intrinsic::umul_with_overflow: { case Intrinsic::umul_with_overflow: {
CC = AArch64CC::NE; CC = AArch64CC::NE;
if (UseImm) { RHSReg = getRegForValue(RHS);
RHSReg = getRegForValue(RHS); if (!RHSReg)
if (!RHSReg) return false;
return false; RHSIsKill = hasTrivialKill(RHS);
RHSIsKill = hasTrivialKill(RHS);
}
if (VT == MVT::i32) { if (VT == MVT::i32) {
MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); MulReg = Emit_UMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64)); unsigned CmpReg = createResultReg(TLI.getRegClassFor(MVT::i64));
@ -1872,15 +1865,23 @@ bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
} }
} }
if (!UseImm) {
RHSReg = getRegForValue(RHS);
if (!RHSReg)
return false;
RHSIsKill = hasTrivialKill(RHS);
}
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
if (Opc) { if (Opc) {
MachineInstrBuilder MIB; MachineInstrBuilder MIB;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
ResultReg) ResultReg)
.addReg(LHSReg, getKillRegState(LHSIsKill)); .addReg(LHSReg, getKillRegState(LHSIsKill));
if (UseImm) if (UseImm) {
MIB.addImm(cast<ConstantInt>(RHS)->getZExtValue()); MIB.addImm(Imm);
else MIB.addImm(0);
} else
MIB.addReg(RHSReg, getKillRegState(RHSIsKill)); MIB.addReg(RHSReg, getKillRegState(RHSIsKill));
} }
else else

View File

@ -4,9 +4,9 @@
; ;
; Get the actual value of the overflow bit. ; Get the actual value of the overflow bit.
; ;
define zeroext i1 @saddo.i32(i32 %v1, i32 %v2, i32* %res) { define zeroext i1 @saddo1.i32(i32 %v1, i32 %v2, i32* %res) {
entry: entry:
; CHECK-LABEL: saddo.i32 ; CHECK-LABEL: saddo1.i32
; CHECK: adds {{w[0-9]+}}, w0, w1 ; CHECK: adds {{w[0-9]+}}, w0, w1
; CHECK-NEXT: cset {{w[0-9]+}}, vs ; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
@ -16,9 +16,48 @@ entry:
ret i1 %obit ret i1 %obit
} }
define zeroext i1 @saddo.i64(i64 %v1, i64 %v2, i64* %res) { ; Test the immediate version.
define zeroext i1 @saddo2.i32(i32 %v1, i32* %res) {
entry: entry:
; CHECK-LABEL: saddo.i64 ; CHECK-LABEL: saddo2.i32
; CHECK: adds {{w[0-9]+}}, w0, #4
; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 4)
%val = extractvalue {i32, i1} %t, 0
%obit = extractvalue {i32, i1} %t, 1
store i32 %val, i32* %res
ret i1 %obit
}
; Test negative immediates.
define zeroext i1 @saddo3.i32(i32 %v1, i32* %res) {
entry:
; CHECK-LABEL: saddo3.i32
; CHECK: subs {{w[0-9]+}}, w0, #4
; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 -4)
%val = extractvalue {i32, i1} %t, 0
%obit = extractvalue {i32, i1} %t, 1
store i32 %val, i32* %res
ret i1 %obit
}
; Test immediates that are too large to be encoded.
define zeroext i1 @saddo4.i32(i32 %v1, i32* %res) {
entry:
; CHECK-LABEL: saddo4.i32
; CHECK: adds {{w[0-9]+}}, w0, {{w[0-9]+}}
; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 16777215)
%val = extractvalue {i32, i1} %t, 0
%obit = extractvalue {i32, i1} %t, 1
store i32 %val, i32* %res
ret i1 %obit
}
define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, i64* %res) {
entry:
; CHECK-LABEL: saddo1.i64
; CHECK: adds {{x[0-9]+}}, x0, x1 ; CHECK: adds {{x[0-9]+}}, x0, x1
; CHECK-NEXT: cset {{w[0-9]+}}, vs ; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
@ -28,6 +67,30 @@ entry:
ret i1 %obit ret i1 %obit
} }
define zeroext i1 @saddo2.i64(i64 %v1, i64* %res) {
entry:
; CHECK-LABEL: saddo2.i64
; CHECK: adds {{x[0-9]+}}, x0, #4
; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 4)
%val = extractvalue {i64, i1} %t, 0
%obit = extractvalue {i64, i1} %t, 1
store i64 %val, i64* %res
ret i1 %obit
}
define zeroext i1 @saddo3.i64(i64 %v1, i64* %res) {
entry:
; CHECK-LABEL: saddo3.i64
; CHECK: subs {{x[0-9]+}}, x0, #4
; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -4)
%val = extractvalue {i64, i1} %t, 0
%obit = extractvalue {i64, i1} %t, 1
store i64 %val, i64* %res
ret i1 %obit
}
define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) { define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) {
entry: entry:
; CHECK-LABEL: uaddo.i32 ; CHECK-LABEL: uaddo.i32
@ -52,9 +115,9 @@ entry:
ret i1 %obit ret i1 %obit
} }
define zeroext i1 @ssubo.i32(i32 %v1, i32 %v2, i32* %res) { define zeroext i1 @ssubo1.i32(i32 %v1, i32 %v2, i32* %res) {
entry: entry:
; CHECK-LABEL: ssubo.i32 ; CHECK-LABEL: ssubo1.i32
; CHECK: subs {{w[0-9]+}}, w0, w1 ; CHECK: subs {{w[0-9]+}}, w0, w1
; CHECK-NEXT: cset {{w[0-9]+}}, vs ; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
@ -64,6 +127,18 @@ entry:
ret i1 %obit ret i1 %obit
} }
define zeroext i1 @ssubo2.i32(i32 %v1, i32* %res) {
entry:
; CHECK-LABEL: ssubo2.i32
; CHECK: adds {{w[0-9]+}}, w0, #4
; CHECK-NEXT: cset {{w[0-9]+}}, vs
%t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 -4)
%val = extractvalue {i32, i1} %t, 0
%obit = extractvalue {i32, i1} %t, 1
store i32 %val, i32* %res
ret i1 %obit
}
define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) { define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) {
entry: entry:
; CHECK-LABEL: ssubo.i64 ; CHECK-LABEL: ssubo.i64