Generate Neon VTBL and VTBX instructions from the corresponding intrinsics.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78835 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bob Wilson 2009-08-12 20:51:55 +00:00
parent a6340628c2
commit 114a266c94
3 changed files with 192 additions and 0 deletions

View File

@ -1989,6 +1989,59 @@ def VZIPq8 : N2VQShuffle<0b00, 0b00011, "vzip.8">;
def VZIPq16 : N2VQShuffle<0b01, 0b00011, "vzip.16">;
def VZIPq32 : N2VQShuffle<0b10, 0b00011, "vzip.32">;
// Vector Table Lookup and Table Extension.
// VTBL : Vector Table Lookup
def VTBL1
: N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$src), NoItinerary,
"vtbl.8\t$dst, \\{$tbl1\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>;
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NoItinerary,
"vtbl.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2
DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NoItinerary,
"vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3
DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
def VTBL4
: N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NoItinerary,
"vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2,
DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
// VTBX : Vector Table Extension
def VTBX1
: N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst),
(ins DPR:$orig, DPR:$tbl1, DPR:$src), NoItinerary,
"vtbx.8\t$dst, \\{$tbl1\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1
DPR:$orig, DPR:$tbl1, DPR:$src)))]>;
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst),
(ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NoItinerary,
"vtbx.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2
DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst),
(ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NoItinerary,
"vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1,
DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
def VTBX4
: N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1,
DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NoItinerary,
"vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1,
DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
//===----------------------------------------------------------------------===//
// NEON instructions for single-precision FP math
//===----------------------------------------------------------------------===//

View File

@ -83,6 +83,36 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd,
FirstOpnd = 3;
NumRegs = 4;
return true;
case ARM::VTBL2:
FirstOpnd = 1;
NumRegs = 2;
return true;
case ARM::VTBL3:
FirstOpnd = 1;
NumRegs = 3;
return true;
case ARM::VTBL4:
FirstOpnd = 1;
NumRegs = 4;
return true;
case ARM::VTBX2:
FirstOpnd = 2;
NumRegs = 2;
return true;
case ARM::VTBX3:
FirstOpnd = 2;
NumRegs = 3;
return true;
case ARM::VTBX4:
FirstOpnd = 2;
NumRegs = 4;
return true;
}
return false;

109
test/CodeGen/ARM/vtbl.ll Normal file
View File

@ -0,0 +1,109 @@
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | FileCheck %s
%struct.__builtin_neon_v8qi2 = type { <8 x i8>, <8 x i8> }
%struct.__builtin_neon_v8qi3 = type { <8 x i8>, <8 x i8>, <8 x i8> }
%struct.__builtin_neon_v8qi4 = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vtbl1:
;CHECK: vtbl.8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %tmp1, <8 x i8> %tmp2)
ret <8 x i8> %tmp3
}
define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__builtin_neon_v8qi2* %B) nounwind {
;CHECK: vtbl2:
;CHECK: vtbl.8
%tmp1 = load <8 x i8>* %A
%tmp2 = load %struct.__builtin_neon_v8qi2* %B
%tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 0
%tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 1
%tmp5 = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4)
ret <8 x i8> %tmp5
}
define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__builtin_neon_v8qi3* %B) nounwind {
;CHECK: vtbl3:
;CHECK: vtbl.8
%tmp1 = load <8 x i8>* %A
%tmp2 = load %struct.__builtin_neon_v8qi3* %B
%tmp3 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 0
%tmp4 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 1
%tmp5 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 2
%tmp6 = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
ret <8 x i8> %tmp6
}
define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__builtin_neon_v8qi4* %B) nounwind {
;CHECK: vtbl4:
;CHECK: vtbl.8
%tmp1 = load <8 x i8>* %A
%tmp2 = load %struct.__builtin_neon_v8qi4* %B
%tmp3 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 0
%tmp4 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 1
%tmp5 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 2
%tmp6 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 3
%tmp7 = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
ret <8 x i8> %tmp7
}
define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK: vtbx1:
;CHECK: vtbx.8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = load <8 x i8>* %C
%tmp4 = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
ret <8 x i8> %tmp4
}
define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__builtin_neon_v8qi2* %B, <8 x i8>* %C) nounwind {
;CHECK: vtbx2:
;CHECK: vtbx.8
%tmp1 = load <8 x i8>* %A
%tmp2 = load %struct.__builtin_neon_v8qi2* %B
%tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 0
%tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp2, 1
%tmp5 = load <8 x i8>* %C
%tmp6 = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
ret <8 x i8> %tmp6
}
define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__builtin_neon_v8qi3* %B, <8 x i8>* %C) nounwind {
;CHECK: vtbx3:
;CHECK: vtbx.8
%tmp1 = load <8 x i8>* %A
%tmp2 = load %struct.__builtin_neon_v8qi3* %B
%tmp3 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 0
%tmp4 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 1
%tmp5 = extractvalue %struct.__builtin_neon_v8qi3 %tmp2, 2
%tmp6 = load <8 x i8>* %C
%tmp7 = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
ret <8 x i8> %tmp7
}
define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__builtin_neon_v8qi4* %B, <8 x i8>* %C) nounwind {
;CHECK: vtbx4:
;CHECK: vtbx.8
%tmp1 = load <8 x i8>* %A
%tmp2 = load %struct.__builtin_neon_v8qi4* %B
%tmp3 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 0
%tmp4 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 1
%tmp5 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 2
%tmp6 = extractvalue %struct.__builtin_neon_v8qi4 %tmp2, 3
%tmp7 = load <8 x i8>* %C
%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
ret <8 x i8> %tmp8
}
declare <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8>, <8 x i8>) nounwind readnone
declare <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
declare <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
declare <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
declare <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
declare <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
declare <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
declare <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone