mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-07-24 06:25:18 +00:00
Fix previous commit: [FastISel][AArch64] Simplify XALU multiplies.
When folding the intrinsic flag into the branch or select we also have to consider the fact if the intrinsic got simplified, because it changes the flag we have to check for. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218034 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -2833,15 +2833,49 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
|
|||||||
if (RetVT != MVT::i32 && RetVT != MVT::i64)
|
if (RetVT != MVT::i32 && RetVT != MVT::i64)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
AArch64CC::CondCode TmpCC;
|
const Value *LHS = II->getArgOperand(0);
|
||||||
switch (II->getIntrinsicID()) {
|
const Value *RHS = II->getArgOperand(1);
|
||||||
default: return false;
|
|
||||||
case Intrinsic::sadd_with_overflow:
|
// Canonicalize immediate to the RHS.
|
||||||
case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
|
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
|
||||||
case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
|
isCommutativeIntrinsic(II))
|
||||||
case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
|
std::swap(LHS, RHS);
|
||||||
|
|
||||||
|
// Simplify multiplies.
|
||||||
|
unsigned IID = II->getIntrinsicID();
|
||||||
|
switch (IID) {
|
||||||
|
default:
|
||||||
|
break;
|
||||||
case Intrinsic::smul_with_overflow:
|
case Intrinsic::smul_with_overflow:
|
||||||
case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
|
if (const auto *C = dyn_cast<ConstantInt>(RHS))
|
||||||
|
if (C->getValue() == 2)
|
||||||
|
IID = Intrinsic::sadd_with_overflow;
|
||||||
|
break;
|
||||||
|
case Intrinsic::umul_with_overflow:
|
||||||
|
if (const auto *C = dyn_cast<ConstantInt>(RHS))
|
||||||
|
if (C->getValue() == 2)
|
||||||
|
IID = Intrinsic::uadd_with_overflow;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
AArch64CC::CondCode TmpCC;
|
||||||
|
switch (IID) {
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
case Intrinsic::sadd_with_overflow:
|
||||||
|
case Intrinsic::ssub_with_overflow:
|
||||||
|
TmpCC = AArch64CC::VS;
|
||||||
|
break;
|
||||||
|
case Intrinsic::uadd_with_overflow:
|
||||||
|
TmpCC = AArch64CC::HS;
|
||||||
|
break;
|
||||||
|
case Intrinsic::usub_with_overflow:
|
||||||
|
TmpCC = AArch64CC::LO;
|
||||||
|
break;
|
||||||
|
case Intrinsic::smul_with_overflow:
|
||||||
|
case Intrinsic::umul_with_overflow:
|
||||||
|
TmpCC = AArch64CC::NE;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if both instructions are in the same basic block.
|
// Check if both instructions are in the same basic block.
|
||||||
|
@@ -587,6 +587,23 @@ continue:
|
|||||||
ret i1 true
|
ret i1 true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define zeroext i1 @smulo2.br.i64(i64 %v1) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: smulo2.br.i64
|
||||||
|
; CHECK: cmn x0, x0
|
||||||
|
; CHECK-NEXT: b.vc
|
||||||
|
%t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 2)
|
||||||
|
%val = extractvalue {i64, i1} %t, 0
|
||||||
|
%obit = extractvalue {i64, i1} %t, 1
|
||||||
|
br i1 %obit, label %overflow, label %continue
|
||||||
|
|
||||||
|
overflow:
|
||||||
|
ret i1 false
|
||||||
|
|
||||||
|
continue:
|
||||||
|
ret i1 true
|
||||||
|
}
|
||||||
|
|
||||||
define zeroext i1 @umulo.br.i32(i32 %v1, i32 %v2) {
|
define zeroext i1 @umulo.br.i32(i32 %v1, i32 %v2) {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: umulo.br.i32
|
; CHECK-LABEL: umulo.br.i32
|
||||||
@@ -622,6 +639,23 @@ continue:
|
|||||||
ret i1 true
|
ret i1 true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define zeroext i1 @umulo2.br.i64(i64 %v1) {
|
||||||
|
entry:
|
||||||
|
; CHECK-LABEL: umulo2.br.i64
|
||||||
|
; CHECK: cmn x0, x0
|
||||||
|
; CHECK-NEXT: b.lo
|
||||||
|
%t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2)
|
||||||
|
%val = extractvalue {i64, i1} %t, 0
|
||||||
|
%obit = extractvalue {i64, i1} %t, 1
|
||||||
|
br i1 %obit, label %overflow, label %continue
|
||||||
|
|
||||||
|
overflow:
|
||||||
|
ret i1 false
|
||||||
|
|
||||||
|
continue:
|
||||||
|
ret i1 true
|
||||||
|
}
|
||||||
|
|
||||||
declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
|
declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
|
||||||
declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
|
declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
|
||||||
declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
|
declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
|
||||||
|
Reference in New Issue
Block a user