From 6345249972b73a400e31c0618c4d67411a0387e6 Mon Sep 17 00:00:00 2001 From: Ana Pazos Date: Thu, 21 Nov 2013 08:16:15 +0000 Subject: [PATCH] Implemented Neon scalar vdup_lane intrinsics. Fixed scalar dup alias and added test case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195330 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64InstrNEON.td | 25 +++++++- test/CodeGen/AArch64/neon-scalar-copy.ll | 80 ++++++++++++++++++++++++ test/MC/AArch64/neon-scalar-dup.s | 26 ++++++++ 3 files changed, 129 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/AArch64/neon-scalar-copy.ll diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 5b6168eb081..c0c572a62e7 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -5883,16 +5883,37 @@ defm : NeonI_SDUP; defm : NeonI_SDUP; defm : NeonI_SDUP; -// Patterns for vector extract of FP data using scalar DUP instructions +// Patterns for vector extract of FP data using scalar DUP instructions defm : NeonI_Scalar_DUP_Elt_pattern; defm : NeonI_Scalar_DUP_Elt_pattern; +multiclass NeonI_Scalar_DUP_Vec_pattern { + + def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)), + (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>; + + def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)), + (ResTy (DUPI + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + OpNImm:$Imm))>; +} +// Patterns for extract subvectors of v1ix data using scalar DUP instructions +defm : NeonI_Scalar_DUP_Vec_pattern; +defm : NeonI_Scalar_DUP_Vec_pattern; +defm : NeonI_Scalar_DUP_Vec_pattern; + + multiclass NeonI_Scalar_DUP_alias { - def : NeonInstAlias; } diff --git a/test/CodeGen/AArch64/neon-scalar-copy.ll b/test/CodeGen/AArch64/neon-scalar-copy.ll new file mode 100644 index 00000000000..59f62374d4f --- /dev/null +++ b/test/CodeGen/AArch64/neon-scalar-copy.ll @@ -0,0 +1,80 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s + +define float @test_dup_sv2S(<2 x float> %v) { + ;CHECK: test_dup_sv2S + ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[1] + %tmp1 = extractelement <2 x float> %v, i32 1 + ret float %tmp1 +} + +define float @test_dup_sv4S(<4 x float> %v) { + ;CHECK: test_dup_sv4S + ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[0] + %tmp1 = extractelement <4 x float> %v, i32 0 + ret float %tmp1 +} + +define double @test_dup_dvD(<1 x double> %v) { + ;CHECK: test_dup_dvD + ;CHECK-NOT: dup {{d[0-31]+}}, {{v[0-31]+}}.d[0] + ;CHECK: ret + %tmp1 = extractelement <1 x double> %v, i32 0 + ret double %tmp1 +} + +define double @test_dup_dv2D(<2 x double> %v) { + ;CHECK: test_dup_dv2D + ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1] + %tmp1 = extractelement <2 x double> %v, i32 1 + ret double %tmp1 +} + +define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) { + ;CHECK: test_vector_dup_bv16B + ;CHECK: dup {{b[0-31]+}}, {{v[0-31]+}}.b[14] + %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> + ret <1 x i8> %shuffle.i +} + +define <1 x i8> @test_vector_dup_bv8B(<8 x i8> %v1) { + ;CHECK: test_vector_dup_bv8B + ;CHECK: dup {{b[0-31]+}}, {{v[0-31]+}}.b[7] + %shuffle.i = shufflevector <8 x i8> %v1, <8 x i8> undef, <1 x i32> + ret <1 x i8> %shuffle.i +} + +define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) { + ;CHECK: test_vector_dup_hv8H + ;CHECK: dup {{h[0-31]+}}, {{v[0-31]+}}.h[7] + %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> + ret <1 x i16> %shuffle.i +} + +define <1 x i16> @test_vector_dup_hv4H(<4 x i16> %v1) { + ;CHECK: test_vector_dup_hv4H + ;CHECK: dup {{h[0-31]+}}, {{v[0-31]+}}.h[3] + %shuffle.i = shufflevector <4 x i16> %v1, <4 x i16> undef, <1 x i32> + ret <1 x i16> %shuffle.i +} + +define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) { + ;CHECK: test_vector_dup_sv4S + ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[3] + %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> + ret <1 x i32> %shuffle +} + +define <1 x i32> @test_vector_dup_sv2S(<2 x i32> %v1) { + ;CHECK: test_vector_dup_sv2S + ;CHECK: dup {{s[0-31]+}}, {{v[0-31]+}}.s[1] + %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <1 x i32> + ret <1 x i32> %shuffle +} + +define <1 x i64> @test_vector_dup_dv2D(<2 x i64> %v1) { + ;CHECK: test_vector_dup_dv2D + ;CHECK: dup {{d[0-31]+}}, {{v[0-31]+}}.d[1] + %shuffle.i = shufflevector <2 x i64> %v1, <2 x i64> undef, <1 x i32> + ret <1 x i64> %shuffle.i +} + diff --git a/test/MC/AArch64/neon-scalar-dup.s b/test/MC/AArch64/neon-scalar-dup.s index 64366f2edc0..77c638df095 100644 --- a/test/MC/AArch64/neon-scalar-dup.s +++ b/test/MC/AArch64/neon-scalar-dup.s @@ -27,3 +27,29 @@ // CHECK: dup d3, v5.d[0] // encoding: [0xa3,0x04,0x08,0x5e] // CHECK: dup d6, v5.d[1] // encoding: [0xa6,0x04,0x18,0x5e] +//------------------------------------------------------------------------------ +// Aliases for Duplicate element (scalar) +//------------------------------------------------------------------------------ + mov b0, v0.b[15] + mov b1, v0.b[7] + mov b17, v0.b[0] + mov h5, v31.h[7] + mov h9, v1.h[4] + mov h11, v17.h[0] + mov s2, v2.s[3] + mov s4, v21.s[0] + mov s31, v21.s[2] + mov d3, v5.d[0] + mov d6, v5.d[1] + +// CHECK: dup b0, v0.b[15] // encoding: [0x00,0x04,0x1f,0x5e] +// CHECK: dup b1, v0.b[7] // encoding: [0x01,0x04,0x0f,0x5e] +// CHECK: dup b17, v0.b[0] // encoding: [0x11,0x04,0x01,0x5e] +// CHECK: dup h5, v31.h[7] // encoding: [0xe5,0x07,0x1e,0x5e] +// CHECK: dup h9, v1.h[4] // encoding: [0x29,0x04,0x12,0x5e] +// CHECK: dup h11, v17.h[0] // encoding: [0x2b,0x06,0x02,0x5e] +// CHECK: dup s2, v2.s[3] // encoding: [0x42,0x04,0x1c,0x5e] +// CHECK: dup s4, v21.s[0] // encoding: [0xa4,0x06,0x04,0x5e] +// CHECK: dup s31, v21.s[2] // encoding: [0xbf,0x06,0x14,0x5e] +// CHECK: dup d3, v5.d[0] // encoding: [0xa3,0x04,0x08,0x5e] +// CHECK: dup d6, v5.d[1] // encoding: [0xa6,0x04,0x18,0x5e]