mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-03 14:08:57 +00:00
Provide correct encodings for the get_lane and set_lane variants of vmov.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117495 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3cede2d0b2
commit
d2fbdb7f5c
@ -1868,14 +1868,15 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
|
|||||||
let Pattern = pattern;
|
let Pattern = pattern;
|
||||||
list<Predicate> Predicates = [HasNEON];
|
list<Predicate> Predicates = [HasNEON];
|
||||||
|
|
||||||
bits<5> Vd;
|
bits<5> V;
|
||||||
bits<4> Rt;
|
bits<4> R;
|
||||||
bits<4> p;
|
bits<4> p;
|
||||||
|
bits<4> lane;
|
||||||
|
|
||||||
let Inst{31-28} = p{3-0};
|
let Inst{31-28} = p{3-0};
|
||||||
let Inst{7} = Vd{4};
|
let Inst{7} = V{4};
|
||||||
let Inst{19-16} = Vd{3-0};
|
let Inst{19-16} = V{3-0};
|
||||||
let Inst{15-12} = Rt{3-0};
|
let Inst{15-12} = R{3-0};
|
||||||
}
|
}
|
||||||
class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
|
class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
|
||||||
dag oops, dag iops, InstrItinClass itin,
|
dag oops, dag iops, InstrItinClass itin,
|
||||||
|
@ -3501,30 +3501,44 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
|
|||||||
// VMOV : Vector Get Lane (move scalar to ARM core register)
|
// VMOV : Vector Get Lane (move scalar to ARM core register)
|
||||||
|
|
||||||
def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
|
def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
|
||||||
(outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
|
(outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
|
||||||
IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]",
|
IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]",
|
||||||
[(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src),
|
[(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
|
||||||
imm:$lane))]>;
|
imm:$lane))]> {
|
||||||
|
let Inst{21} = lane{2};
|
||||||
|
let Inst{6-5} = lane{1-0};
|
||||||
|
}
|
||||||
def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
|
def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
|
||||||
(outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
|
(outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
|
||||||
IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]",
|
IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]",
|
||||||
[(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src),
|
[(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
|
||||||
imm:$lane))]>;
|
imm:$lane))]> {
|
||||||
|
let Inst{21} = lane{1};
|
||||||
|
let Inst{6} = lane{0};
|
||||||
|
}
|
||||||
def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
|
def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
|
||||||
(outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
|
(outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
|
||||||
IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]",
|
IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]",
|
||||||
[(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src),
|
[(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
|
||||||
imm:$lane))]>;
|
imm:$lane))]> {
|
||||||
|
let Inst{21} = lane{2};
|
||||||
|
let Inst{6-5} = lane{1-0};
|
||||||
|
}
|
||||||
def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
|
def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
|
||||||
(outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
|
(outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
|
||||||
IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]",
|
IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]",
|
||||||
[(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src),
|
[(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
|
||||||
imm:$lane))]>;
|
imm:$lane))]> {
|
||||||
|
let Inst{21} = lane{1};
|
||||||
|
let Inst{6} = lane{0};
|
||||||
|
}
|
||||||
def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
|
def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
|
||||||
(outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane),
|
(outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
|
||||||
IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]",
|
IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]",
|
||||||
[(set GPR:$dst, (extractelt (v2i32 DPR:$src),
|
[(set GPR:$R, (extractelt (v2i32 DPR:$V),
|
||||||
imm:$lane))]>;
|
imm:$lane))]> {
|
||||||
|
let Inst{21} = lane{0};
|
||||||
|
}
|
||||||
// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
|
// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
|
||||||
def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
|
def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
|
||||||
(VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
|
(VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
|
||||||
@ -3560,22 +3574,30 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
|
|||||||
|
|
||||||
// VMOV : Vector Set Lane (move ARM core register to scalar)
|
// VMOV : Vector Set Lane (move ARM core register to scalar)
|
||||||
|
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $V" in {
|
||||||
def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst),
|
def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
|
||||||
(ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
|
(ins DPR:$src1, GPR:$R, nohash_imm:$lane),
|
||||||
IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2",
|
IIC_VMOVISL, "vmov", "8", "$V[$lane], $R",
|
||||||
[(set DPR:$dst, (vector_insert (v8i8 DPR:$src1),
|
[(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
|
||||||
GPR:$src2, imm:$lane))]>;
|
GPR:$R, imm:$lane))]> {
|
||||||
def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst),
|
let Inst{21} = lane{2};
|
||||||
(ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
|
let Inst{6-5} = lane{1-0};
|
||||||
IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2",
|
}
|
||||||
[(set DPR:$dst, (vector_insert (v4i16 DPR:$src1),
|
def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
|
||||||
GPR:$src2, imm:$lane))]>;
|
(ins DPR:$src1, GPR:$R, nohash_imm:$lane),
|
||||||
def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst),
|
IIC_VMOVISL, "vmov", "16", "$V[$lane], $R",
|
||||||
(ins DPR:$src1, GPR:$src2, nohash_imm:$lane),
|
[(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
|
||||||
IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2",
|
GPR:$R, imm:$lane))]> {
|
||||||
[(set DPR:$dst, (insertelt (v2i32 DPR:$src1),
|
let Inst{21} = lane{1};
|
||||||
GPR:$src2, imm:$lane))]>;
|
let Inst{6} = lane{0};
|
||||||
|
}
|
||||||
|
def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
|
||||||
|
(ins DPR:$src1, GPR:$R, nohash_imm:$lane),
|
||||||
|
IIC_VMOVISL, "vmov", "32", "$V[$lane], $R",
|
||||||
|
[(set DPR:$V, (insertelt (v2i32 DPR:$src1),
|
||||||
|
GPR:$R, imm:$lane))]> {
|
||||||
|
let Inst{21} = lane{0};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
|
def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
|
||||||
(v16i8 (INSERT_SUBREG QPR:$src1,
|
(v16i8 (INSERT_SUBREG QPR:$src1,
|
||||||
|
@ -304,4 +304,128 @@ declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
|
|||||||
|
|
||||||
declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
|
declare <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
|
||||||
declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
|
declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
|
||||||
declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
|
declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
|
||||||
|
|
||||||
|
define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
|
||||||
|
%tmp1 = load <8 x i8>* %A
|
||||||
|
; CHECK: vmov.s8 r0, d16[1] @ encoding: [0xb0,0x0b,0x50,0xee]
|
||||||
|
%tmp2 = extractelement <8 x i8> %tmp1, i32 1
|
||||||
|
%tmp3 = sext i8 %tmp2 to i32
|
||||||
|
ret i32 %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
|
||||||
|
%tmp1 = load <4 x i16>* %A
|
||||||
|
; CHECK: vmov.s16 r0, d16[1] @ encoding: [0xf0,0x0b,0x10,0xee]
|
||||||
|
%tmp2 = extractelement <4 x i16> %tmp1, i32 1
|
||||||
|
%tmp3 = sext i16 %tmp2 to i32
|
||||||
|
ret i32 %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
|
||||||
|
%tmp1 = load <8 x i8>* %A
|
||||||
|
; CHECK: vmov.u8 r0, d16[1] @ encoding: [0xb0,0x0b,0xd0,0xee]
|
||||||
|
%tmp2 = extractelement <8 x i8> %tmp1, i32 1
|
||||||
|
%tmp3 = zext i8 %tmp2 to i32
|
||||||
|
ret i32 %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
|
||||||
|
%tmp1 = load <4 x i16>* %A
|
||||||
|
; CHECK: vmov.u16 r0, d16[1] @ encoding: [0xf0,0x0b,0x90,0xee]
|
||||||
|
%tmp2 = extractelement <4 x i16> %tmp1, i32 1
|
||||||
|
%tmp3 = zext i16 %tmp2 to i32
|
||||||
|
ret i32 %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
; Do a vector add to keep the extraction from being done directly from memory.
|
||||||
|
define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
|
||||||
|
%tmp1 = load <2 x i32>* %A
|
||||||
|
%tmp2 = add <2 x i32> %tmp1, %tmp1
|
||||||
|
; CHECK: vmov.32 r0, d16[1] @ encoding: [0x90,0x0b,0x30,0xee]
|
||||||
|
%tmp3 = extractelement <2 x i32> %tmp2, i32 1
|
||||||
|
ret i32 %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
|
||||||
|
%tmp1 = load <16 x i8>* %A
|
||||||
|
; CHECK: vmov.s8 r0, d16[1] @ encoding: [0xb0,0x0b,0x50,0xee]
|
||||||
|
%tmp2 = extractelement <16 x i8> %tmp1, i32 1
|
||||||
|
%tmp3 = sext i8 %tmp2 to i32
|
||||||
|
ret i32 %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
|
||||||
|
%tmp1 = load <8 x i16>* %A
|
||||||
|
; CHECK: vmov.s16 r0, d16[1] @ encoding: [0xf0,0x0b,0x10,0xee]
|
||||||
|
%tmp2 = extractelement <8 x i16> %tmp1, i32 1
|
||||||
|
%tmp3 = sext i16 %tmp2 to i32
|
||||||
|
ret i32 %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
|
||||||
|
%tmp1 = load <16 x i8>* %A
|
||||||
|
; CHECK: vmov.u8 r0, d16[1] @ encoding: [0xb0,0x0b,0xd0,0xee]
|
||||||
|
%tmp2 = extractelement <16 x i8> %tmp1, i32 1
|
||||||
|
%tmp3 = zext i8 %tmp2 to i32
|
||||||
|
ret i32 %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
|
||||||
|
%tmp1 = load <8 x i16>* %A
|
||||||
|
; CHECK: vmov.u16 r0, d16[1] @ encoding: [0xf0,0x0b,0x90,0xee]
|
||||||
|
%tmp2 = extractelement <8 x i16> %tmp1, i32 1
|
||||||
|
%tmp3 = zext i16 %tmp2 to i32
|
||||||
|
ret i32 %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
; Do a vector add to keep the extraction from being done directly from memory.
|
||||||
|
define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
|
||||||
|
%tmp1 = load <4 x i32>* %A
|
||||||
|
%tmp2 = add <4 x i32> %tmp1, %tmp1
|
||||||
|
; CHECK: vmov.32 r0, d16[1] @ encoding: [0x90,0x0b,0x30,0xee]
|
||||||
|
%tmp3 = extractelement <4 x i32> %tmp2, i32 1
|
||||||
|
ret i32 %tmp3
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
|
||||||
|
%tmp1 = load <8 x i8>* %A
|
||||||
|
; CHECK: vmov.8 d16[1], r1 @ encoding: [0xb0,0x1b,0x40,0xee]
|
||||||
|
%tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
|
||||||
|
ret <8 x i8> %tmp2
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
|
||||||
|
%tmp1 = load <4 x i16>* %A
|
||||||
|
; CHECK: vmov.16 d16[1], r1 @ encoding: [0xf0,0x1b,0x00,0xee
|
||||||
|
%tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
|
||||||
|
ret <4 x i16> %tmp2
|
||||||
|
}
|
||||||
|
|
||||||
|
define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
|
||||||
|
%tmp1 = load <2 x i32>* %A
|
||||||
|
; CHECK: vmov.32 d16[1], r1 @ encoding: [0x90,0x1b,0x20,0xee]
|
||||||
|
%tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
|
||||||
|
ret <2 x i32> %tmp2
|
||||||
|
}
|
||||||
|
|
||||||
|
define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
|
||||||
|
%tmp1 = load <16 x i8>* %A
|
||||||
|
; CHECK: vmov.8 d18[1], r1 @ encoding: [0xb0,0x1b,0x42,0xee]
|
||||||
|
%tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
|
||||||
|
ret <16 x i8> %tmp2
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
|
||||||
|
%tmp1 = load <8 x i16>* %A
|
||||||
|
; CHECK: vmov.16 d18[1], r1 @ encoding: [0xf0,0x1b,0x02,0xee]
|
||||||
|
%tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
|
||||||
|
ret <8 x i16> %tmp2
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
|
||||||
|
%tmp1 = load <4 x i32>* %A
|
||||||
|
; CHECK: vmov.32 d18[1], r1 @ encoding: [0x90,0x1b,0x22,0xee]
|
||||||
|
%tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
|
||||||
|
ret <4 x i32> %tmp2
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user