From cfe09ed28d8a65b671e8b7a716a933e98e810e32 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Mon, 5 Nov 2012 17:15:56 +0000 Subject: [PATCH] [PATCH] PowerPC: Expand load extend vector operations This patch expands the SEXTLOAD, ZEXTLOAD, and EXTLOAD operations for vector types when altivec is enabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167386 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 10 ++ test/CodeGen/PowerPC/vec_extload.ll | 155 +++++++++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 test/CodeGen/PowerPC/vec_extload.ll diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 8d46e96e4d1..0922011d97c 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -361,6 +361,16 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); + + for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { + MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j; + setTruncStoreAction(VT, InnerVT, Expand); + } + setLoadExtAction(ISD::SEXTLOAD, VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, Expand); } for (unsigned i = (unsigned)MVT::FIRST_FP_VECTOR_VALUETYPE; diff --git a/test/CodeGen/PowerPC/vec_extload.ll b/test/CodeGen/PowerPC/vec_extload.ll new file mode 100644 index 00000000000..201c15b9c73 --- /dev/null +++ b/test/CodeGen/PowerPC/vec_extload.ll @@ -0,0 +1,155 @@ +; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s + +; Check vector extend load expansion with altivec enabled. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; Altivec does not provides an sext intruction, so it expands +; a set of vector stores (stvx), bytes load/sign expand/store +; (lbz/stb), and a final vector load (lvx) to load the result +; extended vector. +define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) { + %b = trunc <16 x i8> %a to <16 x i4> + %c = sext <16 x i4> %b to <16 x i8> + ret <16 x i8> %c +} +; CHECK: v16si8_sext_in_reg: +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lbz +; CHECK: stb +; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}} + +; The zero extend uses a more clever logic: a vector splat +; and a logic and to set higher bits to 0. +define <16 x i8> @v16si8_zext_in_reg(<16 x i8> %a) { + %b = trunc <16 x i8> %a to <16 x i4> + %c = zext <16 x i4> %b to <16 x i8> + ret <16 x i8> %c +} +; CHECK: v16si8_zext_in_reg: +; CHECK: vspltisb [[VMASK:[0-9]+]], 15 +; CHECK-NEXT: vand 2, 2, [[VMASK]] + +; Same as v16si8_sext_in_reg, expands to load/store halfwords (lhz/sth). +define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) { + %b = trunc <8 x i16> %a to <8 x i8> + %c = sext <8 x i8> %b to <8 x i16> + ret <8 x i16> %c +} +; CHECK: v8si16_sext_in_reg: +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lhz +; CHECK: sth +; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}} + +; Same as v8si16_sext_in_reg, but instead of creating the mask +; with a splat, loads it from memory. +define <8 x i16> @v8si16_zext_in_reg(<8 x i16> %a) { + %b = trunc <8 x i16> %a to <8 x i8> + %c = zext <8 x i8> %b to <8 x i16> + ret <8 x i16> %c +} +; CHECK: v8si16_zext_in_reg: +; CHECK: ld [[RMASKTOC:[0-9]+]], .LC{{[0-9]+}}@toc(2) +; CHECK-NEXT: lvx [[VMASK:[0-9]+]], {{[0-9]+}}, [[RMASKTOC]] +; CHECK-NEXT: vand 2, 2, [[VMASK]] + +; Same as v16si8_sext_in_reg, expands to load halfword (lha) and +; store words (stw). +define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) { + %b = trunc <4 x i32> %a to <4 x i16> + %c = sext <4 x i16> %b to <4 x i32> + ret <4 x i32> %c +} +; CHECK: v4si32_sext_in_reg: +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lha +; CHECK: stw +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lha +; CHECK: stw +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lha +; CHECK: stw +; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: lha +; CHECK: stw +; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}} + +; Same as v8si16_sext_in_reg. +define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) { + %b = trunc <4 x i32> %a to <4 x i16> + %c = zext <4 x i16> %b to <4 x i32> + ret <4 x i32> %c +} +; CHECK: v4si32_zext_in_reg: +; CHECK: vspltisw [[VMASK:[0-9]+]], -16 +; CHECK-NEXT: vsrw [[VMASK]], [[VMASK]], [[VMASK]] +; CHECK-NEXT: vand 2, 2, [[VMASK]]