From 815af82b74fa0901e818f5d16ee418675f399101 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nadav.rotem@intel.com>
Date: Wed, 19 Oct 2011 20:43:16 +0000
Subject: [PATCH] Improve code generation for vselect on SSE2: When checking
 the availability of instructions using the TLI, a 'promoted' instruction IS
 available. It means that the value is bitcasted to another type for which
 there is an operation. The correct check for the availablity of an
 instruction is to check if it should be expanded.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142542 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 16 +++++++++-------
 test/CodeGen/X86/sse2-blend.ll                 | 17 +++++++++++------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 7fe35306bd2..4e02b90aad3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -394,10 +394,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
 
   // If we can't even use the basic vector operations of
   // AND,OR,XOR, we will have to scalarize the op.
-  if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
-      !TLI.isOperationLegalOrCustom(ISD::XOR, VT) ||
-      !TLI.isOperationLegalOrCustom(ISD::OR, VT))
-        return DAG.UnrollVectorOp(Op.getNode());
+  // Notice that the operation may be 'promoted' which means that it is
+  // 'bitcasted' to another type which is handled.
+  if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand)
+    return DAG.UnrollVectorOp(Op.getNode());
 
   assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits()
          && "Invalid mask size");
@@ -421,9 +423,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
   DebugLoc DL = Op.getDebugLoc();
 
   // Make sure that the SINT_TO_FP and SRL instructions are available.
-  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) ||
-      !TLI.isOperationLegalOrCustom(ISD::SRL, VT))
-      return DAG.UnrollVectorOp(Op.getNode());
+  if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
+    return DAG.UnrollVectorOp(Op.getNode());
 
  EVT SVT = VT.getScalarType();
   assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 56b099ec42e..0007cab0961 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -promote-elements -mattr=+sse2,-sse41 | FileCheck %s
-
-
-; currently (xor v4i32) is defined as illegal, so we scalarize the code.
+; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
 
+; CHECK: vsel_float
+; CHECK: pandn
+; CHECK: pand
+; CHECK: por
+; CHECK: ret
 define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
   %A = load <4 x float>* %v1
   %B = load <4 x float>* %v2
@@ -11,8 +13,11 @@ define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
   ret void
 }
 
-; currently (xor v4i32) is defined as illegal, so we scalarize the code.
-
+; CHECK: vsel_i32
+; CHECK: pandn
+; CHECK: pand
+; CHECK: por
+; CHECK: ret
 define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
   %A = load <4 x i32>* %v1
   %B = load <4 x i32>* %v2