From c0b7f693cedcdf0f02208ab4546414645b3084ea Mon Sep 17 00:00:00 2001 From: Pawel Bylica Date: Wed, 6 May 2015 10:19:14 +0000 Subject: [PATCH] SelectionDAG: Handle out-of-bounds index in extract vector element Summary: This patch correctly handles undef case of EXTRACT_VECTOR_ELT node where the element index is constant and not less than vector size. Test Plan: CodeGen for X86 test included. Also one incorrect regression test fixed. Reviewers: qcolombet, chandlerc, hfinkel Reviewed By: hfinkel Subscribers: hfinkel, llvm-commits Differential Revision: http://reviews.llvm.org/D9250 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236584 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 ++ test/CodeGen/X86/extract-store.ll | 30 +++++++++++-- test/CodeGen/X86/extractelement-index.ll | 51 +++++++++++++++++++++++ 3 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 test/CodeGen/X86/extractelement-index.ll diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 3b22e9715d1..d9a85369324 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3408,6 +3408,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, if (N1.getOpcode() == ISD::UNDEF) return getUNDEF(VT); + // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF + if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + return getUNDEF(VT); + // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is // expanding copies of large vectors from registers. if (N2C && diff --git a/test/CodeGen/X86/extract-store.ll b/test/CodeGen/X86/extract-store.ll index 27d93804ba6..801ecc9ada6 100644 --- a/test/CodeGen/X86/extract-store.ll +++ b/test/CodeGen/X86/extract-store.ll @@ -1,7 +1,8 @@ ; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse4.1 | FileCheck %s -check-prefix=SSE41 ; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX -define void @pextrb(i8* nocapture %dst, <16 x i8> %foo) { +; CHECK-LABEL: extract_i8 +define void @extract_i8(i8* nocapture %dst, <16 x i8> %foo) { ; AVX: vpextrb ; SSE41: pextrb ; AVX-NOT: movb @@ -11,12 +12,35 @@ define void @pextrb(i8* nocapture %dst, <16 x i8> %foo) { ret void } -define void @pextrw(i16* nocapture %dst, <8 x i16> %foo) { +; CHECK-LABEL: extract_i16 +define void @extract_i16(i16* nocapture %dst, <8 x i16> %foo) { ; AVX: vpextrw ; SSE41: pextrw ; AVX-NOT: movw ; SSE41-NOT: movw - %vecext = extractelement <8 x i16> %foo, i32 15 + %vecext = extractelement <8 x i16> %foo, i32 7 + store i16 %vecext, i16* %dst, align 1 + ret void +} + +; CHECK-LABEL: extract_i8_undef +define void @extract_i8_undef(i8* nocapture %dst, <16 x i8> %foo) { +; AVX-NOT: vpextrb +; SSE41-NOT: pextrb +; AVX-NOT: movb +; SSE41-NOT: movb + %vecext = extractelement <16 x i8> %foo, i32 16 ; undef + store i8 %vecext, i8* %dst, align 1 + ret void +} + +; CHECK-LABEL: extract_i16_undef +define void @extract_i16_undef(i16* nocapture %dst, <8 x i16> %foo) { +; AVX-NOT: vpextrw +; SSE41-NOT: pextrw +; AVX-NOT: movw +; SSE41-NOT: movw + %vecext = extractelement <8 x i16> %foo, i32 9 ; undef store i16 %vecext, i16* %dst, align 1 ret void } diff --git a/test/CodeGen/X86/extractelement-index.ll b/test/CodeGen/X86/extractelement-index.ll new file mode 100644 index 00000000000..66d8767fa2f --- /dev/null +++ b/test/CodeGen/X86/extractelement-index.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s -check-prefix=AVX + + +; CHECK-LABEL: extractelement_index_1: +define i8 @extractelement_index_1(<32 x i8> %a) nounwind { + ; X64: movaps + ; AVX: vpextrb $1 + %b = extractelement <32 x i8> %a, i256 1 + ret i8 %b +} + +; CHECK-LABEL: extractelement_index_2: +define i32 @extractelement_index_2(<8 x i32> %a) nounwind { + ; X64: pshufd + ; AVX: vextractf128 $1 + ; AVX-NEXT: vpextrd $3 + %b = extractelement <8 x i32> %a, i64 7 + ret i32 %b +} + +; CHECK-LABEL: extractelement_index_3: +define i32 @extractelement_index_3(<8 x i32> %a) nounwind { + ; CHECK-NOT: pextr + %b = extractelement <8 x i32> %a, i64 15 + ret i32 %b +} + +; CHECK-LABEL: extractelement_index_4: +define i32 @extractelement_index_4(<8 x i32> %a) nounwind { + ; X64: movd + ; AVX: vextractf128 $1 + ; AVX-NEXT: vmovd + %b = extractelement <8 x i32> %a, i256 4 + ret i32 %b +} + +; CHECK-LABEL: extractelement_index_5: +define i8 @extractelement_index_5(<32 x i8> %a, i256 %i) nounwind { + ; X64: movaps + ; AVX: vmovaps + %b = extractelement <32 x i8> %a, i256 %i + ret i8 %b +} + +; CHECK-LABEL: extractelement_index_6: +define i8 @extractelement_index_6(<32 x i8> %a) nounwind { + ; CHECK-NOT: pextr + %b = extractelement <32 x i8> %a, i256 -1 + ret i8 %b +} \ No newline at end of file