diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 4cd0a476dd9..eab96a4f47e 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -87,10 +87,6 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::i32, Expand); setOperationAction(ISD::SELECT, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f64, Expand); - setOperationAction(ISD::SELECT, MVT::v4f32, Expand); - setOperationAction(ISD::SELECT, MVT::v4i32, Expand); - setOperationAction(ISD::SELECT, MVT::v8i16, Expand); - setOperationAction(ISD::SELECT, MVT::v16i8, Expand); // PowerPC wants to turn select_cc of FP into fsel when possible. setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); @@ -178,17 +174,29 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) // will selectively turn on ones that can be effectively codegen'd. for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { - // add/sub/and/or/xor are legal for all supported vector VT's. + // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal); setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal); - setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal); - setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal); - setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal); // We promote all shuffles to v16i8. setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote); - AddPromotedToType(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8); + AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8); + + // We promote all non-typed operations to v4i32. + setOperationAction(ISD::AND , (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::AND , (MVT::ValueType)VT, MVT::v4i32); + setOperationAction(ISD::OR , (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::OR , (MVT::ValueType)VT, MVT::v4i32); + setOperationAction(ISD::XOR , (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::XOR , (MVT::ValueType)VT, MVT::v4i32); + setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::LOAD , (MVT::ValueType)VT, MVT::v4i32); + setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32); + setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote); + AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32); + // No other operations are legal. setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); @@ -205,6 +213,13 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) // with merges, splats, etc. setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); + setOperationAction(ISD::AND , MVT::v4i32, Legal); + setOperationAction(ISD::OR , MVT::v4i32, Legal); + setOperationAction(ISD::XOR , MVT::v4i32, Legal); + setOperationAction(ISD::LOAD , MVT::v4i32, Legal); + setOperationAction(ISD::SELECT, MVT::v4i32, Expand); + setOperationAction(ISD::STORE , MVT::v4i32, Legal); + addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 971a715a880..2cb4cdac22d 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -158,7 +158,7 @@ class VX2_Int xo, string opc, Intrinsic IntID> // Instruction Definitions. def IMPLICIT_DEF_VRRC : Pseudo<(ops VRRC:$rD), "; $rD = IMPLICIT_DEF_VRRC", - [(set VRRC:$rD, (v4f32 (undef)))]>; + [(set VRRC:$rD, (v4i32 (undef)))]>; let noResults = 1 in { def DSS : DSS_Form<822, (ops u5imm:$A, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), @@ -541,25 +541,16 @@ def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM), (DSTST 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>; // Undef. -def : Pat<(v16i8 (undef)), (v16i8 (IMPLICIT_DEF_VRRC))>; -def : Pat<(v8i16 (undef)), (v8i16 (IMPLICIT_DEF_VRRC))>; -def : Pat<(v4i32 (undef)), (v4i32 (IMPLICIT_DEF_VRRC))>; +def : Pat<(v16i8 (undef)), (IMPLICIT_DEF_VRRC)>; +def : Pat<(v8i16 (undef)), (IMPLICIT_DEF_VRRC)>; +def : Pat<(v4f32 (undef)), (IMPLICIT_DEF_VRRC)>; // Loads. -def : Pat<(v16i8 (load xoaddr:$src)), (v16i8 (LVX xoaddr:$src))>; -def : Pat<(v8i16 (load xoaddr:$src)), (v8i16 (LVX xoaddr:$src))>; def : Pat<(v4i32 (load xoaddr:$src)), (v4i32 (LVX xoaddr:$src))>; -def : Pat<(v4f32 (load xoaddr:$src)), (v4f32 (LVX xoaddr:$src))>; // Stores. -def : Pat<(store (v16i8 VRRC:$rS), xoaddr:$dst), - (STVX (v16i8 VRRC:$rS), xoaddr:$dst)>; -def : Pat<(store (v8i16 VRRC:$rS), xoaddr:$dst), - (STVX (v8i16 VRRC:$rS), xoaddr:$dst)>; def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst), (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>; -def : Pat<(store (v4f32 VRRC:$rS), xoaddr:$dst), - (STVX (v4f32 VRRC:$rS), xoaddr:$dst)>; // Bit conversions. def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>; @@ -603,37 +594,11 @@ def:Pat<(vector_shuffle (v16i8 VRRC:$vA), undef, VMRGHW_unary_shuffle_mask:$in), (VMRGHW VRRC:$vA, VRRC:$vA)>; // Logical Operations -def : Pat<(v16i8 (vnot VRRC:$vA)), (v16i8 (VNOR VRRC:$vA, VRRC:$vA))>; -def : Pat<(v8i16 (vnot VRRC:$vA)), (v8i16 (VNOR VRRC:$vA, VRRC:$vA))>; def : Pat<(v4i32 (vnot VRRC:$vA)), (v4i32 (VNOR VRRC:$vA, VRRC:$vA))>; - -def : Pat<(v16i8 (vnot_conv VRRC:$vA)), (v16i8 (VNOR VRRC:$vA, VRRC:$vA))>; -def : Pat<(v8i16 (vnot_conv VRRC:$vA)), (v8i16 (VNOR VRRC:$vA, VRRC:$vA))>; def : Pat<(v4i32 (vnot_conv VRRC:$vA)), (v4i32 (VNOR VRRC:$vA, VRRC:$vA))>; - -def : Pat<(v16i8 (and VRRC:$A, VRRC:$B)), (v16i8 (VAND VRRC:$A, VRRC:$B))>; -def : Pat<(v8i16 (and VRRC:$A, VRRC:$B)), (v8i16 (VAND VRRC:$A, VRRC:$B))>; -def : Pat<(v16i8 (or VRRC:$A, VRRC:$B)), (v16i8 (VOR VRRC:$A, VRRC:$B))>; -def : Pat<(v8i16 (or VRRC:$A, VRRC:$B)), (v8i16 (VOR VRRC:$A, VRRC:$B))>; - -def : Pat<(v16i8 (xor VRRC:$A, VRRC:$B)), (v16i8 (VXOR VRRC:$A, VRRC:$B))>; -def : Pat<(v8i16 (xor VRRC:$A, VRRC:$B)), (v8i16 (VXOR VRRC:$A, VRRC:$B))>; -def : Pat<(v16i8 (vnot (or VRRC:$A, VRRC:$B))),(v16i8 (VNOR VRRC:$A, VRRC:$B))>; -def : Pat<(v8i16 (vnot (or VRRC:$A, VRRC:$B))),(v8i16 (VNOR VRRC:$A, VRRC:$B))>; -def : Pat<(v16i8 (and VRRC:$A, (vnot VRRC:$B))), - (v16i8 (VANDC VRRC:$A, VRRC:$B))>; -def : Pat<(v8i16 (and VRRC:$A, (vnot VRRC:$B))), - (v8i16 (VANDC VRRC:$A, VRRC:$B))>; - - -def : Pat<(v16i8 (vnot_conv (or VRRC:$A, VRRC:$B))),(v16i8 (VNOR VRRC:$A, VRRC:$B))>; -def : Pat<(v8i16 (vnot_conv (or VRRC:$A, VRRC:$B))),(v8i16 (VNOR VRRC:$A, VRRC:$B))>; -def : Pat<(v4i32 (vnot_conv (or VRRC:$A, VRRC:$B))),(v4i32 (VNOR VRRC:$A, VRRC:$B))>; -def : Pat<(v16i8 (and VRRC:$A, (vnot_conv VRRC:$B))), - (v16i8 (VANDC VRRC:$A, VRRC:$B))>; -def : Pat<(v8i16 (and VRRC:$A, (vnot_conv VRRC:$B))), - (v8i16 (VANDC VRRC:$A, VRRC:$B))>; +def : Pat<(v4i32 (vnot_conv (or VRRC:$A, VRRC:$B))), + (v4i32 (VNOR VRRC:$A, VRRC:$B))>; def : Pat<(v4i32 (and VRRC:$A, (vnot_conv VRRC:$B))), (v4i32 (VANDC VRRC:$A, VRRC:$B))>; diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt index 3c928ad6bbd..f5a7c173691 100644 --- a/lib/Target/PowerPC/README_ALTIVEC.txt +++ b/lib/Target/PowerPC/README_ALTIVEC.txt @@ -65,7 +65,7 @@ clobbered regs. //===----------------------------------------------------------------------===// -Implement passing/returning vectors by value. +Implement passing vectors by value. //===----------------------------------------------------------------------===// @@ -75,7 +75,7 @@ of C1/C2/C3, then a load and vperm of Variable. //===----------------------------------------------------------------------===// We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte -aligned stack slot, followed by a lve*x/vperm. We should probably just store it +aligned stack slot, followed by a load/vperm. We should probably just store it to a scalar stack slot, then use lvsl/vperm to load it. If the value is already in memory, this is a huge win. @@ -92,22 +92,6 @@ be constants. The verifier should enforce this constraint. //===----------------------------------------------------------------------===// -Instead of writting a pattern for type-agnostic operations (e.g. gen-zero, load, -store, and, ...) in every supported type, make legalize do the work. We should -have a canonical type that we want operations changed to (e.g. v4i32 for -build_vector) and legalize should change non-identical types to thse. This is -similar to what it does for operations that are only supported in some types, -e.g. x86 cmov (not supported on bytes). - -This would fix two problems: -1. Writing patterns multiple times. -2. Identical operations in different types are not getting CSE'd. - -We already do this for shuffle and build_vector. We need load,undef,and,or,xor, -etc. - -//===----------------------------------------------------------------------===// - Implement multiply for vector integer types, to avoid the horrible scalarized code produced by legalize.