From 2a2263e744130ccc7f73c88021bca4cc037eb35e Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Mon, 29 Oct 2012 17:57:12 +0000 Subject: [PATCH] Fix PR14204 - Add missing pattern on X86ISD::VZEXT from VR256 to VR256 when AVX2 is enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166947 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 15 +++++++++++++++ test/CodeGen/X86/pr14204.ll | 15 +++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 test/CodeGen/X86/pr14204.ll diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index e9c7f3e7f1c..3fcc0dc4149 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -5850,6 +5850,21 @@ let Predicates = [HasAVX2] in { def : Pat<(v4i64 (X86vzext (v8i16 VR128:$src))), (VPMOVZXWQYrr VR128:$src)>; def : Pat<(v4i64 (X86vzext (v4i32 VR128:$src))), (VPMOVZXDQYrr VR128:$src)>; + + def : Pat<(v16i16 (X86vzext (v32i8 VR256:$src))), + (VPMOVZXBWYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v8i32 (X86vzext (v32i8 VR256:$src))), + (VPMOVZXBDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vzext (v32i8 VR256:$src))), + (VPMOVZXBQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v8i32 (X86vzext (v16i16 VR256:$src))), + (VPMOVZXWDYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + def : Pat<(v4i64 (X86vzext (v16i16 VR256:$src))), + (VPMOVZXWQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; + + def : Pat<(v4i64 (X86vzext (v8i32 VR256:$src))), + (VPMOVZXDQYrr (EXTRACT_SUBREG VR256:$src, sub_xmm))>; } let Predicates = [HasAVX] in { diff --git a/test/CodeGen/X86/pr14204.ll b/test/CodeGen/X86/pr14204.ll new file mode 100644 index 00000000000..42e362bf3b9 --- /dev/null +++ b/test/CodeGen/X86/pr14204.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=x86_64-linux-pc -mcpu=core-avx2 | FileCheck %s + +; FIXME: vpmovsxwd should be generated instead of vpmovzxwd followed by +; SLL/SRA. + +define <8 x i32> @foo(<8 x i1> %bar) nounwind readnone { +entry: + %s = sext <8 x i1> %bar to <8 x i32> + ret <8 x i32> %s +; CHECK: foo +; CHECK: vpmovzxwd +; CHECK: vpslld +; CHECK: vpsrad +; CHECK: ret +}