[FastISel][AArch64] Extend 'select' lowering to support also i1 to i16.

Related to rdar://problem/18960150. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221846 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-15 13:40:33 +00:00 · 2014-11-13 00:36:38 +00:00 · 2014-11-13 00:36:38 +00:00 · 8d6824ea4c
commit 8d6824ea4c
parent 4848765635
3 changed files with 105 additions and 97 deletions
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@ -2497,59 +2497,71 @@ bool AArch64FastISel::selectCmp(const Instruction *I) {
 }

 bool AArch64FastISel::selectSelect(const Instruction *I) {
-  const SelectInst *SI = cast<SelectInst>(I);
-
-  EVT DestEVT = TLI.getValueType(SI->getType(), true);
-  if (!DestEVT.isSimple())
+  assert(isa<SelectInst>(I) && "Expected a select instruction.");
+  MVT VT;
+  if (!isTypeSupported(I->getType(), VT))
    return false;

-  MVT DestVT = DestEVT.getSimpleVT();
-  if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 &&
-      DestVT != MVT::f64)
+  unsigned Opc;
+  const TargetRegisterClass *RC;
+  switch (VT.SimpleTy) {
+  default:
    return false;
-
-  unsigned SelectOpc;
-  const TargetRegisterClass *RC = nullptr;
-  switch (DestVT.SimpleTy) {
-  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+  case MVT::i16:
  case MVT::i32:
-    SelectOpc = AArch64::CSELWr;    RC = &AArch64::GPR32RegClass; break;
+    Opc = AArch64::CSELWr;
+    RC = &AArch64::GPR32RegClass;
+    break;
  case MVT::i64:
-    SelectOpc = AArch64::CSELXr;    RC = &AArch64::GPR64RegClass; break;
+    Opc = AArch64::CSELXr;
+    RC = &AArch64::GPR64RegClass;
+    break;
  case MVT::f32:
-    SelectOpc = AArch64::FCSELSrrr; RC = &AArch64::FPR32RegClass; break;
+    Opc = AArch64::FCSELSrrr;
+    RC = &AArch64::FPR32RegClass;
+    break;
  case MVT::f64:
-    SelectOpc = AArch64::FCSELDrrr; RC = &AArch64::FPR64RegClass; break;
+    Opc = AArch64::FCSELDrrr;
+    RC = &AArch64::FPR64RegClass;
+    break;
  }

+  const SelectInst *SI = cast<SelectInst>(I);
  const Value *Cond = SI->getCondition();
-  bool NeedTest = true;
  AArch64CC::CondCode CC = AArch64CC::NE;
-  if (foldXALUIntrinsic(CC, I, Cond))
-    NeedTest = false;

-  unsigned CondReg = getRegForValue(Cond);
-  if (!CondReg)
-    return false;
-  bool CondIsKill = hasTrivialKill(Cond);
+  // Try to pickup the flags, so we don't have to emit another compare.
+  if (foldXALUIntrinsic(CC, I, Cond)) {
+    // Fake request the condition to force emission of the XALU intrinsic.
+    unsigned CondReg = getRegForValue(Cond);
+    if (!CondReg)
+      return false;
+  } else {
+    unsigned CondReg = getRegForValue(Cond);
+    if (!CondReg)
+      return false;
+    bool CondIsKill = hasTrivialKill(Cond);

-  if (NeedTest) {
-    unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
-    assert(ANDReg && "Unexpected AND instruction emission failure.");
-    emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
+    // Emit a TST instruction (ANDS wzr, reg, #imm).
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDSWri),
+            AArch64::WZR)
+        .addReg(CondReg, getKillRegState(CondIsKill))
+        .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
  }

-  unsigned TrueReg = getRegForValue(SI->getTrueValue());
-  bool TrueIsKill = hasTrivialKill(SI->getTrueValue());
+  unsigned Src1Reg = getRegForValue(SI->getTrueValue());
+  bool Src1IsKill = hasTrivialKill(SI->getTrueValue());

-  unsigned FalseReg = getRegForValue(SI->getFalseValue());
-  bool FalseIsKill = hasTrivialKill(SI->getFalseValue());
+  unsigned Src2Reg = getRegForValue(SI->getFalseValue());
+  bool Src2IsKill = hasTrivialKill(SI->getFalseValue());

-  if (!TrueReg || !FalseReg)
+  if (!Src1Reg || !Src2Reg)
    return false;

-  unsigned ResultReg = fastEmitInst_rri(SelectOpc, RC, TrueReg, TrueIsKill,
-                                        FalseReg, FalseIsKill, CC);
+  unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
+                                        Src2IsKill, CC);
  updateValueMap(I, ResultReg);
  return true;
 }
--- a/test/CodeGen/AArch64/arm64-fast-isel-select.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-select.ll
@ -1,63 +0,0 @@
-; RUN: llc -O0 -fast-isel-abort -mtriple=arm64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
-
-define i32 @t1(i32 %c) nounwind readnone {
-entry:
-; CHECK: @t1
-; CHECK: and w0, w0, #0x1
-; CHECK: cmp w0, #0
-; CHECK: csel w0, w{{[0-9]+}}, w{{[0-9]+}}, ne
-  %0 = icmp sgt i32 %c, 1
-  %1 = select i1 %0, i32 123, i32 357
-  ret i32 %1
-}
-
-define i64 @t2(i32 %c) nounwind readnone {
-entry:
-; CHECK: @t2
-; CHECK: and w0, w0, #0x1
-; CHECK: cmp w0, #0
-; CHECK: csel x0, x{{[0-9]+}}, x{{[0-9]+}}, ne
-  %0 = icmp sgt i32 %c, 1
-  %1 = select i1 %0, i64 123, i64 357
-  ret i64 %1
-}
-
-define i32 @t3(i1 %c, i32 %a, i32 %b) nounwind readnone {
-entry:
-; CHECK: @t3
-; CHECK: and w0, w0, #0x1
-; CHECK: cmp w0, #0
-; CHECK: csel w0, w{{[0-9]+}}, w{{[0-9]+}}, ne
-  %0 = select i1 %c, i32 %a, i32 %b
-  ret i32 %0
-}
-
-define i64 @t4(i1 %c, i64 %a, i64 %b) nounwind readnone {
-entry:
-; CHECK: @t4
-; CHECK: and w0, w0, #0x1
-; CHECK: cmp w0, #0
-; CHECK: csel x0, x{{[0-9]+}}, x{{[0-9]+}}, ne
-  %0 = select i1 %c, i64 %a, i64 %b
-  ret i64 %0
-}
-
-define float @t5(i1 %c, float %a, float %b) nounwind readnone {
-entry:
-; CHECK: @t5
-; CHECK: and w0, w0, #0x1
-; CHECK: cmp w0, #0
-; CHECK: fcsel s0, s0, s1, ne
-  %0 = select i1 %c, float %a, float %b
-  ret float %0
-}
-
-define double @t6(i1 %c, double %a, double %b) nounwind readnone {
-entry:
-; CHECK: @t6
-; CHECK: and w0, w0, #0x1
-; CHECK: cmp w0, #0
-; CHECK: fcsel d0, d0, d1, ne
-  %0 = select i1 %c, double %a, double %b
-  ret double %0
-}
--- a/test/CodeGen/AArch64/fast-isel-select.ll
+++ b/test/CodeGen/AArch64/fast-isel-select.ll
@ -0,0 +1,59 @@
+; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+; First test the different supported value types for select.
+define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
+; CHECK-LABEL: select_i1
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  csel {{w[0-9]+}}, w1, w2, ne
+  %1 = select i1 %c, i1 %a, i1 %b
+  ret i1 %1
+}
+
+define zeroext i8 @select_i8(i1 zeroext %c, i8 zeroext %a, i8 zeroext %b) {
+; CHECK-LABEL: select_i8
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  csel {{w[0-9]+}}, w1, w2, ne
+  %1 = select i1 %c, i8 %a, i8 %b
+  ret i8 %1
+}
+
+define zeroext i16 @select_i16(i1 zeroext %c, i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: select_i16
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  csel {{w[0-9]+}}, w1, w2, ne
+  %1 = select i1 %c, i16 %a, i16 %b
+  ret i16 %1
+}
+
+define i32 @select_i32(i1 zeroext %c, i32 %a, i32 %b) {
+; CHECK-LABEL: select_i32
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  csel {{w[0-9]+}}, w1, w2, ne
+  %1 = select i1 %c, i32 %a, i32 %b
+  ret i32 %1
+}
+
+define i64 @select_i64(i1 zeroext %c, i64 %a, i64 %b) {
+; CHECK-LABEL: select_i64
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  csel {{x[0-9]+}}, x1, x2, ne
+  %1 = select i1 %c, i64 %a, i64 %b
+  ret i64 %1
+}
+
+define float @select_f32(i1 zeroext %c, float %a, float %b) {
+; CHECK-LABEL: select_f32
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ne
+  %1 = select i1 %c, float %a, float %b
+  ret float %1
+}
+
+define double @select_f64(i1 zeroext %c, double %a, double %b) {
+; CHECK-LABEL: select_f64
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  fcsel {{d[0-9]+}}, d0, d1, ne
+  %1 = select i1 %c, double %a, double %b
+  ret double %1
+}