diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 7c9a9f7e8c5..0d6ff38a91d 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -301,6 +301,7 @@ def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; // 256-bit bitconvert pattern fragments def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>; +def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>; def vzmovl_v2i64 : PatFrag<(ops node:$src), (bitconvert (v2i64 (X86vzmovl diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 6d22f2b60ed..93902d171ab 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1522,20 +1522,35 @@ multiclass sse12_fp_packed_logical opc, string OpcodeStr, /// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms /// -multiclass sse12_fp_packed_logical_y opc, string OpcodeStr> { +multiclass sse12_fp_packed_logical_y opc, string OpcodeStr, + SDNode OpNode, int HasNoPat = 0> { defm PSY : sse12_fp_packed_logical_rm, VEX_4V; + !strconcat(OpcodeStr, "ps"), f256mem, + !if(HasNoPat, [], // rr + [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, + VR256:$src2)))]), + !if(HasNoPat, [], // rm + [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), + (memopv4i64 addr:$src2)))]), 0>, VEX_4V; defm PDY : sse12_fp_packed_logical_rm, OpSize, VEX_4V; + !strconcat(OpcodeStr, "pd"), f256mem, + !if(HasNoPat, [], // rr + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (bc_v4i64 (v4f64 VR256:$src2))))]), + !if(HasNoPat, [], // rm + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (memopv4i64 addr:$src2)))]), 0>, + OpSize, VEX_4V; } // AVX 256-bit packed logical ops forms -defm VAND : sse12_fp_packed_logical_y<0x54, "and">; -defm VOR : sse12_fp_packed_logical_y<0x56, "or">; -defm VXOR : sse12_fp_packed_logical_y<0x57, "xor">; -let isCommutable = 0 in - defm VANDN : sse12_fp_packed_logical_y<0x55, "andn">; +defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>; +defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>; +defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>; +let isCommutable = 0 in { + defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", undef /* dummy */, 1>; +} defm AND : sse12_fp_packed_logical<0x54, "and", and>; defm OR : sse12_fp_packed_logical<0x56, "or", or>; @@ -3660,6 +3675,9 @@ let Predicates = [HasXMMInt] in { let Predicates = [HasAVX] in { def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; + def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; } // Move scalar to XMM zero-extended diff --git a/test/CodeGen/X86/avx-256-logic.ll b/test/CodeGen/X86/avx-256-logic.ll new file mode 100644 index 00000000000..05e82895fec --- /dev/null +++ b/test/CodeGen/X86/avx-256-logic.ll @@ -0,0 +1,116 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s + +; CHECK: vandpd +define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %x to <4 x i64> + %1 = bitcast <4 x double> %y to <4 x i64> + %and.i = and <4 x i64> %0, %1 + %2 = bitcast <4 x i64> %and.i to <4 x double> + ret <4 x double> %2 +} + +; CHECK: vandpd LCP{{.*}}(%rip) +define <4 x double> @andpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %y to <4 x i64> + %and.i = and <4 x i64> %0, + %1 = bitcast <4 x i64> %and.i to <4 x double> + ret <4 x double> %1 +} + +; CHECK: vandps +define <8 x float> @andps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %x to <8 x i32> + %1 = bitcast <8 x float> %y to <8 x i32> + %and.i = and <8 x i32> %0, %1 + %2 = bitcast <8 x i32> %and.i to <8 x float> + ret <8 x float> %2 +} + +; CHECK: vandps LCP{{.*}}(%rip) +define <8 x float> @andps256fold(<8 x float> %y) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %y to <8 x i32> + %and.i = and <8 x i32> %0, + %1 = bitcast <8 x i32> %and.i to <8 x float> + ret <8 x float> %1 +} + +; CHECK: vxorpd +define <4 x double> @xorpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %x to <4 x i64> + %1 = bitcast <4 x double> %y to <4 x i64> + %xor.i = xor <4 x i64> %0, %1 + %2 = bitcast <4 x i64> %xor.i to <4 x double> + ret <4 x double> %2 +} + +; CHECK: vxorpd LCP{{.*}}(%rip) +define <4 x double> @xorpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %y to <4 x i64> + %xor.i = xor <4 x i64> %0, + %1 = bitcast <4 x i64> %xor.i to <4 x double> + ret <4 x double> %1 +} + +; CHECK: vxorps +define <8 x float> @xorps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %x to <8 x i32> + %1 = bitcast <8 x float> %y to <8 x i32> + %xor.i = xor <8 x i32> %0, %1 + %2 = bitcast <8 x i32> %xor.i to <8 x float> + ret <8 x float> %2 +} + +; CHECK: vxorps LCP{{.*}}(%rip) +define <8 x float> @xorps256fold(<8 x float> %y) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %y to <8 x i32> + %xor.i = xor <8 x i32> %0, + %1 = bitcast <8 x i32> %xor.i to <8 x float> + ret <8 x float> %1 +} + +; CHECK: vorpd +define <4 x double> @orpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %x to <4 x i64> + %1 = bitcast <4 x double> %y to <4 x i64> + %or.i = or <4 x i64> %0, %1 + %2 = bitcast <4 x i64> %or.i to <4 x double> + ret <4 x double> %2 +} + +; CHECK: vorpd LCP{{.*}}(%rip) +define <4 x double> @orpd256fold(<4 x double> %y) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %y to <4 x i64> + %or.i = or <4 x i64> %0, + %1 = bitcast <4 x i64> %or.i to <4 x double> + ret <4 x double> %1 +} + +; CHECK: vorps +define <8 x float> @orps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %x to <8 x i32> + %1 = bitcast <8 x float> %y to <8 x i32> + %or.i = or <8 x i32> %0, %1 + %2 = bitcast <8 x i32> %or.i to <8 x float> + ret <8 x float> %2 +} + +; CHECK: vorps LCP{{.*}}(%rip) +define <8 x float> @orps256fold(<8 x float> %y) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %y to <8 x i32> + %or.i = or <8 x i32> %0, + %1 = bitcast <8 x i32> %or.i to <8 x float> + ret <8 x float> %1 +} +