All "integer" logical ops (pand, por, pxor) are now promoted to v2i64.

Clean up and fix various logical ops issues.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27633 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2006-04-12 21:21:57 +00:00
parent cc9876124e
commit 2c3ae37213
3 changed files with 74 additions and 149 deletions

View File

@ -794,3 +794,7 @@ http://llvm.org/bugs/show_bug.cgi?id=729
X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible
to choose between movaps, movapd, and movdqa based on types of source and
destination?
How about andps, andpd, and pand? Do we really care about the type of the packed
elements? If not, why not always use the "ps" variants which are likely to be
shorter.

View File

@ -275,6 +275,9 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
if (Subtarget->hasSSE1()) {
addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
setOperationAction(ISD::AND, MVT::v4f32, Legal);
setOperationAction(ISD::OR, MVT::v4f32, Legal);
setOperationAction(ISD::XOR, MVT::v4f32, Legal);
setOperationAction(ISD::ADD, MVT::v4f32, Legal);
setOperationAction(ISD::SUB, MVT::v4f32, Legal);
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
@ -301,36 +304,43 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
setOperationAction(ISD::MUL, MVT::v2f64, Legal);
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
// Promote v16i8, v8i16, v4i32 selects to v2i64. Custom lower v2i64, v2f64,
// and v4f32 selects.
for (unsigned VT = (unsigned)MVT::v16i8;
VT != (unsigned)MVT::v2i64; VT++) {
setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
// Custom lower build_vector, vector_shuffle, and extract_vector_elt.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Custom);
}
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
setOperationAction(ISD::AND, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::AND, (MVT::ValueType)VT, MVT::v2i64);
setOperationAction(ISD::OR, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::OR, (MVT::ValueType)VT, MVT::v2i64);
setOperationAction(ISD::XOR, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::XOR, (MVT::ValueType)VT, MVT::v2i64);
setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::LOAD, (MVT::ValueType)VT, MVT::v2i64);
setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
}
// Custom lower v2i64 and v2f64 selects.
setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
setOperationAction(ISD::LOAD, MVT::v2i64, Legal);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
}
// We want to custom lower some of our intrinsics.
@ -2827,6 +2837,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
return SDOperand();
MVT::ValueType VT = Op.getValueType();
// TODO: handle v16i8.
if (MVT::getSizeInBits(VT) == 16) {
// Transform it so it match pextrw which produces a 32-bit result.
MVT::ValueType EVT = (MVT::ValueType)(VT+1);

View File

@ -1019,9 +1019,7 @@ let isTwoAddress = 1 in {
let isCommutable = 1 in {
def ANDPSrr : PSI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"andps {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(and (bc_v4i32 (v4f32 VR128:$src1)),
(bc_v4i32 (v4f32 VR128:$src2))))]>;
[(set VR128:$dst, (v2i64 (and VR128:$src1, VR128:$src2)))]>;
def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"andpd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
@ -1029,9 +1027,7 @@ def ANDPDrr : PDI<0x54, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
(bc_v2i64 (v2f64 VR128:$src2))))]>;
def ORPSrr : PSI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"orps {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(or (bc_v4i32 (v4f32 VR128:$src1)),
(bc_v4i32 (v4f32 VR128:$src2))))]>;
[(set VR128:$dst, (v2i64 (or VR128:$src1, VR128:$src2)))]>;
def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"orpd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
@ -1039,9 +1035,7 @@ def ORPDrr : PDI<0x56, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
(bc_v2i64 (v2f64 VR128:$src2))))]>;
def XORPSrr : PSI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"xorps {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(xor (bc_v4i32 (v4f32 VR128:$src1)),
(bc_v4i32 (v4f32 VR128:$src2))))]>;
[(set VR128:$dst, (v2i64 (xor VR128:$src1, VR128:$src2)))]>;
def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"xorpd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
@ -1050,9 +1044,8 @@ def XORPDrr : PDI<0x57, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
}
def ANDPSrm : PSI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"andps {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(and (bc_v4i32 (v4f32 VR128:$src1)),
(bc_v4i32 (loadv4f32 addr:$src2))))]>;
[(set VR128:$dst, (and VR128:$src1,
(bc_v2i64 (loadv4f32 addr:$src2))))]>;
def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"andpd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
@ -1060,9 +1053,8 @@ def ANDPDrm : PDI<0x54, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
(bc_v2i64 (loadv2f64 addr:$src2))))]>;
def ORPSrm : PSI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"orps {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(or (bc_v4i32 (v4f32 VR128:$src1)),
(bc_v4i32 (loadv4f32 addr:$src2))))]>;
[(set VR128:$dst, (or VR128:$src1,
(bc_v2i64 (loadv4f32 addr:$src2))))]>;
def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"orpd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
@ -1070,9 +1062,8 @@ def ORPDrm : PDI<0x56, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
(bc_v2i64 (loadv2f64 addr:$src2))))]>;
def XORPSrm : PSI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"xorps {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(xor (bc_v4i32 (v4f32 VR128:$src1)),
(bc_v4i32 (loadv4f32 addr:$src2))))]>;
[(set VR128:$dst, (xor VR128:$src1,
(bc_v2i64 (loadv4f32 addr:$src2))))]>;
def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
"xorpd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
@ -1080,14 +1071,14 @@ def XORPDrm : PDI<0x57, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
(bc_v2i64 (loadv2f64 addr:$src2))))]>;
def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"andnps {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(and (vnot (bc_v4i32 (v4f32 VR128:$src1))),
(bc_v4i32 (v4f32 VR128:$src2))))]>;
[(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
(bc_v2i64 (v4i32 immAllOnesV))),
VR128:$src2)))]>;
def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops VR128:$dst, VR128:$src1,f128mem:$src2),
"andnps {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(and (vnot (bc_v4i32 (v4f32 VR128:$src1))),
(bc_v4i32 (loadv4f32 addr:$src2))))]>;
[(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
(bc_v2i64 (v4i32 immAllOnesV))),
(bc_v2i64 (loadv4f32 addr:$src2)))))]>;
def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
"andnpd {$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
@ -1922,110 +1913,29 @@ def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
// 128-bit logical shifts
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
(v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
(v2i64 (PSLLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>,
Requires<[HasSSE2]>;
def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
(v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>;
(v2i64 (PSRLDQri VR128:$src1, (PSxLDQ_imm imm:$src2)))>,
Requires<[HasSSE2]>;
// Logical ops
def : Pat<(and (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)),
(ANDPSrm VR128:$src1, addr:$src2)>;
def : Pat<(and (bc_v2i64 (v2f64 VR128:$src1)), (loadv2i64 addr:$src2)),
(ANDPDrm VR128:$src1, addr:$src2)>;
def : Pat<(or (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)),
(ORPSrm VR128:$src1, addr:$src2)>;
def : Pat<(or (bc_v2i64 (v2f64 VR128:$src1)), (loadv2i64 addr:$src2)),
(ORPDrm VR128:$src1, addr:$src2)>;
def : Pat<(xor (bc_v4i32 (v4f32 VR128:$src1)), (loadv4i32 addr:$src2)),
(XORPSrm VR128:$src1, addr:$src2)>;
def : Pat<(xor (bc_v2i64 (v2f64 VR128:$src1)), (loadv2i64 addr:$src2)),
(XORPDrm VR128:$src1, addr:$src2)>;
def : Pat<(and (vnot (bc_v4i32 (v4f32 VR128:$src1))), (loadv4i32 addr:$src2)),
(ANDNPSrm VR128:$src1, addr:$src2)>;
def : Pat<(and (vnot (bc_v2i64 (v2f64 VR128:$src1))), (loadv2i64 addr:$src2)),
(ANDNPDrm VR128:$src1, addr:$src2)>;
// Some special case pandn patterns.
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
VR128:$src2)),
(PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
VR128:$src2)),
(PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
VR128:$src2)),
(PANDNrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
def : Pat<(bc_v4f32 (v4i32 (and VR128:$src1, VR128:$src2))),
(ANDPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(bc_v4f32 (v4i32 (or VR128:$src1, VR128:$src2))),
(ORPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(bc_v4f32 (v4i32 (xor VR128:$src1, VR128:$src2))),
(XORPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(bc_v4f32 (v4i32 (and (vnot VR128:$src1), VR128:$src2))),
(ANDNPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(bc_v4f32 (v4i32 (and VR128:$src1, (load addr:$src2)))),
(ANDPSrm (v4i32 VR128:$src1), addr:$src2)>;
def : Pat<(bc_v4f32 (v4i32 (or VR128:$src1, (load addr:$src2)))),
(ORPSrm VR128:$src1, addr:$src2)>;
def : Pat<(bc_v4f32 (v4i32 (xor VR128:$src1, (load addr:$src2)))),
(XORPSrm VR128:$src1, addr:$src2)>;
def : Pat<(bc_v4f32 (v4i32 (and (vnot VR128:$src1), (load addr:$src2)))),
(ANDNPSrm VR128:$src1, addr:$src2)>;
def : Pat<(bc_v2f64 (v2i64 (and VR128:$src1, VR128:$src2))),
(ANDPDrr VR128:$src1, VR128:$src2)>;
def : Pat<(bc_v2f64 (v2i64 (or VR128:$src1, VR128:$src2))),
(ORPDrr VR128:$src1, VR128:$src2)>;
def : Pat<(bc_v2f64 (v2i64 (xor VR128:$src1, VR128:$src2))),
(XORPDrr VR128:$src1, VR128:$src2)>;
def : Pat<(bc_v2f64 (v2i64 (and (vnot VR128:$src1), VR128:$src2))),
(ANDNPDrr VR128:$src1, VR128:$src2)>;
def : Pat<(bc_v2f64 (v2i64 (and VR128:$src1, (load addr:$src2)))),
(ANDPSrm (v2i64 VR128:$src1), addr:$src2)>;
def : Pat<(bc_v2f64 (v2i64 (or VR128:$src1, (load addr:$src2)))),
(ORPSrm VR128:$src1, addr:$src2)>;
def : Pat<(bc_v2f64 (v2i64 (xor VR128:$src1, (load addr:$src2)))),
(XORPSrm VR128:$src1, addr:$src2)>;
def : Pat<(bc_v2f64 (v2i64 (and (vnot VR128:$src1), (load addr:$src2)))),
(ANDNPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (and VR128:$src1, VR128:$src2)),
(PANDrr VR128:$src1, VR128:$src2)>;
def : Pat<(v8i16 (and VR128:$src1, VR128:$src2)),
(PANDrr VR128:$src1, VR128:$src2)>;
def : Pat<(v16i8 (and VR128:$src1, VR128:$src2)),
(PANDrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (or VR128:$src1, VR128:$src2)),
(PORrr VR128:$src1, VR128:$src2)>;
def : Pat<(v8i16 (or VR128:$src1, VR128:$src2)),
(PORrr VR128:$src1, VR128:$src2)>;
def : Pat<(v16i8 (or VR128:$src1, VR128:$src2)),
(PORrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (xor VR128:$src1, VR128:$src2)),
(PXORrr VR128:$src1, VR128:$src2)>;
def : Pat<(v8i16 (xor VR128:$src1, VR128:$src2)),
(PXORrr VR128:$src1, VR128:$src2)>;
def : Pat<(v16i8 (xor VR128:$src1, VR128:$src2)),
(PXORrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (and (vnot VR128:$src1), VR128:$src2)),
(PANDNrr VR128:$src1, VR128:$src2)>;
def : Pat<(v8i16 (and (vnot VR128:$src1), VR128:$src2)),
(PANDNrr VR128:$src1, VR128:$src2)>;
def : Pat<(v16i8 (and (vnot VR128:$src1), VR128:$src2)),
(PANDNrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4i32 (and VR128:$src1, (load addr:$src2))),
(PANDrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (and VR128:$src1, (load addr:$src2))),
(PANDrm VR128:$src1, addr:$src2)>;
def : Pat<(v16i8 (and VR128:$src1, (load addr:$src2))),
(PANDrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (or VR128:$src1, (load addr:$src2))),
(PORrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (or VR128:$src1, (load addr:$src2))),
(PORrm VR128:$src1, addr:$src2)>;
def : Pat<(v16i8 (or VR128:$src1, (load addr:$src2))),
(PORrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (xor VR128:$src1, (load addr:$src2))),
(PXORrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (xor VR128:$src1, (load addr:$src2))),
(PXORrm VR128:$src1, addr:$src2)>;
def : Pat<(v16i8 (xor VR128:$src1, (load addr:$src2))),
(PXORrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (and (vnot VR128:$src1), (load addr:$src2))),
(PANDNrm VR128:$src1, addr:$src2)>;
def : Pat<(v8i16 (and (vnot VR128:$src1), (load addr:$src2))),
(PANDNrm VR128:$src1, addr:$src2)>;
def : Pat<(v16i8 (and (vnot VR128:$src1), (load addr:$src2))),
(PANDNrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v4i32 immAllOnesV))),
(load addr:$src2))),
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v8i16 immAllOnesV))),
(load addr:$src2))),
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
(load addr:$src2))),
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;