diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index e64f1bdacd8..88038ff0844 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -2833,15 +2833,49 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, if (RetVT != MVT::i32 && RetVT != MVT::i64) return false; + const Value *LHS = II->getArgOperand(0); + const Value *RHS = II->getArgOperand(1); + + // Canonicalize immediate to the RHS. + if (isa(LHS) && !isa(RHS) && + isCommutativeIntrinsic(II)) + std::swap(LHS, RHS); + + // Simplify multiplies. + unsigned IID = II->getIntrinsicID(); + switch (IID) { + default: + break; + case Intrinsic::smul_with_overflow: + if (const auto *C = dyn_cast(RHS)) + if (C->getValue() == 2) + IID = Intrinsic::sadd_with_overflow; + break; + case Intrinsic::umul_with_overflow: + if (const auto *C = dyn_cast(RHS)) + if (C->getValue() == 2) + IID = Intrinsic::uadd_with_overflow; + break; + } + AArch64CC::CondCode TmpCC; - switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::sadd_with_overflow: - case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break; - case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break; - case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break; - case Intrinsic::smul_with_overflow: - case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break; + switch (IID) { + default: + return false; + case Intrinsic::sadd_with_overflow: + case Intrinsic::ssub_with_overflow: + TmpCC = AArch64CC::VS; + break; + case Intrinsic::uadd_with_overflow: + TmpCC = AArch64CC::HS; + break; + case Intrinsic::usub_with_overflow: + TmpCC = AArch64CC::LO; + break; + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + TmpCC = AArch64CC::NE; + break; } // Check if both instructions are in the same basic block. diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll index 687a2f735b2..59ce6848afb 100644 --- a/test/CodeGen/AArch64/arm64-xaluo.ll +++ b/test/CodeGen/AArch64/arm64-xaluo.ll @@ -587,6 +587,23 @@ continue: ret i1 true } +define zeroext i1 @smulo2.br.i64(i64 %v1) { +entry: +; CHECK-LABEL: smulo2.br.i64 +; CHECK: cmn x0, x0 +; CHECK-NEXT: b.vc + %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 2) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + br i1 %obit, label %overflow, label %continue + +overflow: + ret i1 false + +continue: + ret i1 true +} + define zeroext i1 @umulo.br.i32(i32 %v1, i32 %v2) { entry: ; CHECK-LABEL: umulo.br.i32 @@ -622,6 +639,23 @@ continue: ret i1 true } +define zeroext i1 @umulo2.br.i64(i64 %v1) { +entry: +; CHECK-LABEL: umulo2.br.i64 +; CHECK: cmn x0, x0 +; CHECK-NEXT: b.lo + %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + br i1 %obit, label %overflow, label %continue + +overflow: + ret i1 false + +continue: + ret i1 true +} + declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone