From 4fcb922c70b67bbd74ff77a9b831bd3699839b6d Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 28 Mar 2006 02:43:26 +0000 Subject: [PATCH] - Clean up / consoladate various shuffle masks. - Some misc. bug fixes. - Use MOVHPDrm to load from m64 to upper half of a XMM register. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27210 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 60 +++++------- lib/Target/X86/X86ISelLowering.h | 13 +-- lib/Target/X86/X86InstrSSE.td | 149 +++++++++++++++++++---------- 3 files changed, 125 insertions(+), 97 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 41316dd9a1b..bd0a4a8eb00 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1451,24 +1451,6 @@ bool X86::isSHUFPMask(SDNode *N) { return true; } -/// isMOVLHPSorUNPCKLPDMask - Return true if the specified VECTOR_SHUFFLE -/// operand specifies a shuffle of elements that is suitable for input to -/// MOVLHPS or UNPCKLPD. -bool X86::isMOVLHPSorUNPCKLPDMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 2) - return false; - - // Expect bit 0 == 0, bit1 == 2 - SDOperand Bit0 = N->getOperand(0); - SDOperand Bit1 = N->getOperand(1); - assert(isa(Bit0) && isa(Bit1) && - "Invalid VECTOR_SHUFFLE mask!"); - return (cast(Bit0)->getValue() == 0 && - cast(Bit1)->getValue() == 2); -} - /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. bool X86::isMOVHLPSMask(SDNode *N) { @@ -1477,7 +1459,7 @@ bool X86::isMOVHLPSMask(SDNode *N) { if (N->getNumOperands() != 2) return false; - // Expect bit 0 == 0, bit1 == 3 + // Expect bit 0 == 1, bit1 == 1 SDOperand Bit0 = N->getOperand(0); SDOperand Bit1 = N->getOperand(1); assert(isa(Bit0) && isa(Bit1) && @@ -1486,23 +1468,6 @@ bool X86::isMOVHLPSMask(SDNode *N) { cast(Bit1)->getValue() == 3); } -/// isUNPCKHPDMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to UNPCKHPD. -bool X86::isUNPCKHPDMask(SDNode *N) { - assert(N->getOpcode() == ISD::BUILD_VECTOR); - - if (N->getNumOperands() != 2) - return false; - - // Expect bit 0 == 1, bit1 == 3 - SDOperand Bit0 = N->getOperand(0); - SDOperand Bit1 = N->getOperand(1); - assert(isa(Bit0) && isa(Bit1) && - "Invalid VECTOR_SHUFFLE mask!"); - return (cast(Bit0)->getValue() == 1 && - cast(Bit1)->getValue() == 3); -} - /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. bool X86::isUNPCKLMask(SDNode *N) { @@ -1526,6 +1491,29 @@ bool X86::isUNPCKLMask(SDNode *N) { return true; } +/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to UNPCKH. +bool X86::isUNPCKHMask(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + + unsigned NumElems = N->getNumOperands(); + if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) + return false; + + for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) { + SDOperand BitI = N->getOperand(i); + SDOperand BitI1 = N->getOperand(i+1); + assert(isa(BitI) && isa(BitI1) && + "Invalid VECTOR_SHUFFLE mask!"); + if (cast(BitI)->getValue() != j + NumElems/2) + return false; + if (cast(BitI1)->getValue() != j + NumElems/2 + NumElems) + return false; + } + + return true; +} + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies /// a splat of a single element. bool X86::isSplatMask(SDNode *N) { diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 132c2ac81ce..0654e7e1acc 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -188,23 +188,18 @@ namespace llvm { /// specifies a shuffle of elements that is suitable for input to SHUFP*. bool isSHUFPMask(SDNode *N); - /// isMOVLHPSorUNPCKLPDMask - Return true if the specified VECTOR_SHUFFLE - /// operand specifies a shuffle of elements that is suitable for input to - /// MOVLHPS or UNPCKLPD. - bool isMOVLHPSorUNPCKLPDMask(SDNode *N); - /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVHLPS. bool isMOVHLPSMask(SDNode *N); - /// isUNPCKHPDMask - Return true if the specified VECTOR_SHUFFLE operand - /// specifies a shuffle of elements that is suitable for input to UNPCKHPD. - bool isUNPCKHPDMask(SDNode *N); - /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. bool isUNPCKLMask(SDNode *N); + /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand + /// specifies a shuffle of elements that is suitable for input to UNPCKH. + bool isUNPCKHMask(SDNode *N); + /// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element. bool isSplatMask(SDNode *N); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a94fe892c05..8dc3f46b935 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -63,22 +63,18 @@ def MOVLHPS_splat_mask : PatLeaf<(build_vector), [{ return X86::isSplatMask(N); }]>; -def MOVLHPSorUNPCKLPD_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isMOVLHPSorUNPCKLPDMask(N); -}], SHUFFLE_get_shuf_imm>; - def MOVHLPS_shuffle_mask : PatLeaf<(build_vector), [{ return X86::isMOVHLPSMask(N); -}], SHUFFLE_get_shuf_imm>; - -def UNPCKHPD_shuffle_mask : PatLeaf<(build_vector), [{ - return X86::isUNPCKHPDMask(N); -}], SHUFFLE_get_shuf_imm>; +}]>; def UNPCKL_shuffle_mask : PatLeaf<(build_vector), [{ return X86::isUNPCKLMask(N); }]>; +def UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{ + return X86::isUNPCKHMask(N); +}]>; + // Only use PSHUF if it is not a splat. def PSHUFD_shuffle_mask : PatLeaf<(build_vector), [{ return !X86::isSplatMask(N) && X86::isPSHUFDMask(N); @@ -172,7 +168,7 @@ def MOVSD128rr : SDI<0x10, MRMSrcReg, (ops VR128:$dst, FR64:$src), def MOVSD128rm : SDI<0x10, MRMSrcMem, (ops VR128:$dst, f64mem:$src), "movsd {$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4f32 (scalar_to_vector (loadf64 addr:$src))))]>; + (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>; // Conversion instructions @@ -476,21 +472,34 @@ def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops VR128:$dst, f64mem:$src), def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, VR128:$src), "movlpd {$src, $dst|$dst, $src}", []>; -def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src), - "movhps {$src, $dst|$dst, $src}", []>; +let isTwoAddress = 1 in { +def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2), + "movhps {$src2, $dst|$dst, $src2}", []>; +def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2), + "movhpd {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2f64 (vector_shuffle VR128:$src1, + (scalar_to_vector (loadf64 addr:$src2)), + UNPCKL_shuffle_mask)))]>; +} + def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src), "movhps {$src, $dst|$dst, $src}", []>; -def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops VR128:$dst, f64mem:$src), - "movhpd {$src, $dst|$dst, $src}", []>; def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, VR128:$src), "movhpd {$src, $dst|$dst, $src}", []>; let isTwoAddress = 1 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "movlhps {$src2, $dst|$dst, $src2}", []>; + "movlhps {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2f64 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "movlhps {$src2, $dst|$dst, $src2}", []>; + "movlhps {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2f64 (vector_shuffle VR128:$src1, VR128:$src2, + MOVHLPS_shuffle_mask)))]>; } def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, VR128:$src), @@ -784,16 +793,29 @@ def SHUFPDrm : PDIi8<0xC6, MRMSrcMem, def UNPCKHPSrr : PSI<0x15, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "unpckhps {$src2, $dst|$dst, $src2}", []>; + "unpckhps {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def UNPCKHPSrm : PSI<0x15, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), - "unpckhps {$src2, $dst|$dst, $src2}", []>; + "unpckhps {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (vector_shuffle VR128:$src1, (load addr:$src2), + UNPCKH_shuffle_mask)))]>; def UNPCKHPDrr : PDI<0x15, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "unpckhpd {$src2, $dst|$dst, $src2}", []>; + "unpckhpd {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2f64 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def UNPCKHPDrm : PDI<0x15, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), - "unpckhpd {$src2, $dst|$dst, $src2}", []>; + "unpckhpd {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), + UNPCKH_shuffle_mask)))]>; + def UNPCKLPSrr : PSI<0x14, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "unpcklps {$src2, $dst|$dst, $src2}", @@ -808,10 +830,16 @@ def UNPCKLPSrm : PSI<0x14, MRMSrcMem, UNPCKL_shuffle_mask)))]>; def UNPCKLPDrr : PDI<0x14, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "unpcklpd {$src2, $dst|$dst, $src2}", []>; + "unpcklpd {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2f64 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def UNPCKLPDrm : PDI<0x14, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), - "unpcklpd {$src2, $dst|$dst, $src2}", []>; + "unpcklpd {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2f64 (vector_shuffle VR128:$src1, (load addr:$src2), + UNPCKL_shuffle_mask)))]>; } //===----------------------------------------------------------------------===// @@ -940,35 +968,65 @@ def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, UNPCKL_shuffle_mask)))]>; def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "punpcklqdq {$src2, $dst|$dst, $src2}", []>; + "punpcklqdq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2i64 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKL_shuffle_mask)))]>; def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2), - "punpcklqdq {$src2, $dst|$dst, $src2}", []>; + "punpcklqdq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2), + UNPCKL_shuffle_mask)))]>; def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "punpckhbw {$src2, $dst|$dst, $src2}", []>; + "punpckhbw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v16i8 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2), - "punpckhbw {$src2, $dst|$dst, $src2}", []>; + "punpckhbw {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v16i8 (vector_shuffle VR128:$src1, (load addr:$src2), + UNPCKH_shuffle_mask)))]>; def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "punpckhwd {$src2, $dst|$dst, $src2}", []>; + "punpckhwd {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v8i16 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2), - "punpckhwd {$src2, $dst|$dst, $src2}", []>; + "punpckhwd {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v8i16 (vector_shuffle VR128:$src1, (load addr:$src2), + UNPCKH_shuffle_mask)))]>; def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "punpckhdq {$src2, $dst|$dst, $src2}", []>; + "punpckhdq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4i32 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2), - "punpckhdq {$src2, $dst|$dst, $src2}", []>; + "punpckhdq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4i32 (vector_shuffle VR128:$src1, (load addr:$src2), + UNPCKH_shuffle_mask)))]>; def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "punpckhdq {$src2, $dst|$dst, $src2}", []>; + "punpckhdq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2i64 (vector_shuffle VR128:$src1, VR128:$src2, + UNPCKH_shuffle_mask)))]>; def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, (ops VR128:$dst, VR128:$src1, i128mem:$src2), - "punpckhqdq {$src2, $dst|$dst, $src2}", []>; + "punpckhqdq {$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v2i64 (vector_shuffle VR128:$src1, (load addr:$src2), + UNPCKH_shuffle_mask)))]>; } //===----------------------------------------------------------------------===// @@ -1147,29 +1205,16 @@ def : Pat<(vector_shuffle (v4i32 VR128:$src), (undef), PSHUFD_shuffle_mask:$sm), (v4i32 (PSHUFDrr VR128:$src, PSHUFD_shuffle_mask:$sm))>, Requires<[HasSSE2]>; -// Shuffle v2f64 / v2i64 -def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2), - MOVLHPSorUNPCKLPD_shuffle_mask:$sm), - (v2f64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2), - MOVHLPS_shuffle_mask:$sm), - (v2f64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v2f64 VR128:$src1), (v2f64 VR128:$src2), - UNPCKHPD_shuffle_mask:$sm), - (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v2f64 VR128:$src1), (loadv2f64 addr:$src2), - MOVLHPSorUNPCKLPD_shuffle_mask:$sm), - (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>; - +// Shuffle v2i64 def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2), - MOVLHPSorUNPCKLPD_shuffle_mask:$sm), + UNPCKL_shuffle_mask:$sm), (v2i64 (MOVLHPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2), MOVHLPS_shuffle_mask:$sm), (v2i64 (MOVHLPSrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v2i64 VR128:$src1), (v2i64 VR128:$src2), - UNPCKHPD_shuffle_mask:$sm), - (v2i64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; -def : Pat<(vector_shuffle (v2i64 VR128:$src1), (loadv2i64 addr:$src2), - MOVLHPSorUNPCKLPD_shuffle_mask:$sm), +def : Pat<(vector_shuffle (v2i64 VR128:$src1), (load addr:$src2), + UNPCKL_shuffle_mask:$sm), (v2i64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>; +def : Pat<(vector_shuffle (v2i64 VR128:$src1), (load addr:$src2), + UNPCKH_shuffle_mask:$sm), + (v2i64 (UNPCKHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>;