mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-06 09:44:39 +00:00
Implement Neon VST[234] operations.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78330 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
004f7c7049
commit
b36ec86c01
@ -1306,11 +1306,11 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
|
||||
}
|
||||
|
||||
case ARMISD::VLD2D: {
|
||||
MVT VT = Op.getValueType();
|
||||
SDValue MemAddr, MemUpdate, MemOpc;
|
||||
if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc))
|
||||
return NULL;
|
||||
unsigned Opc;
|
||||
unsigned Opc = 0;
|
||||
MVT VT = Op.getValueType();
|
||||
switch (VT.getSimpleVT()) {
|
||||
default: llvm_unreachable("unhandled VLD2D type");
|
||||
case MVT::v8i8: Opc = ARM::VLD2d8; break;
|
||||
@ -1323,11 +1323,11 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
|
||||
}
|
||||
|
||||
case ARMISD::VLD3D: {
|
||||
MVT VT = Op.getValueType();
|
||||
SDValue MemAddr, MemUpdate, MemOpc;
|
||||
if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc))
|
||||
return NULL;
|
||||
unsigned Opc;
|
||||
unsigned Opc = 0;
|
||||
MVT VT = Op.getValueType();
|
||||
switch (VT.getSimpleVT()) {
|
||||
default: llvm_unreachable("unhandled VLD3D type");
|
||||
case MVT::v8i8: Opc = ARM::VLD3d8; break;
|
||||
@ -1340,11 +1340,11 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
|
||||
}
|
||||
|
||||
case ARMISD::VLD4D: {
|
||||
MVT VT = Op.getValueType();
|
||||
SDValue MemAddr, MemUpdate, MemOpc;
|
||||
if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc))
|
||||
return NULL;
|
||||
unsigned Opc;
|
||||
unsigned Opc = 0;
|
||||
MVT VT = Op.getValueType();
|
||||
switch (VT.getSimpleVT()) {
|
||||
default: llvm_unreachable("unhandled VLD4D type");
|
||||
case MVT::v8i8: Opc = ARM::VLD4d8; break;
|
||||
@ -1357,6 +1357,59 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
|
||||
ResTys.push_back(MVT::Other);
|
||||
return CurDAG->getTargetNode(Opc, dl, ResTys, Ops, 3);
|
||||
}
|
||||
|
||||
case ARMISD::VST2D: {
|
||||
SDValue MemAddr, MemUpdate, MemOpc;
|
||||
if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc))
|
||||
return NULL;
|
||||
unsigned Opc = 0;
|
||||
switch (N->getOperand(2).getValueType().getSimpleVT()) {
|
||||
default: llvm_unreachable("unhandled VST2D type");
|
||||
case MVT::v8i8: Opc = ARM::VST2d8; break;
|
||||
case MVT::v4i16: Opc = ARM::VST2d16; break;
|
||||
case MVT::v2f32:
|
||||
case MVT::v2i32: Opc = ARM::VST2d32; break;
|
||||
}
|
||||
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
|
||||
N->getOperand(2), N->getOperand(3) };
|
||||
return CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 5);
|
||||
}
|
||||
|
||||
case ARMISD::VST3D: {
|
||||
SDValue MemAddr, MemUpdate, MemOpc;
|
||||
if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc))
|
||||
return NULL;
|
||||
unsigned Opc = 0;
|
||||
switch (N->getOperand(2).getValueType().getSimpleVT()) {
|
||||
default: llvm_unreachable("unhandled VST3D type");
|
||||
case MVT::v8i8: Opc = ARM::VST3d8; break;
|
||||
case MVT::v4i16: Opc = ARM::VST3d16; break;
|
||||
case MVT::v2f32:
|
||||
case MVT::v2i32: Opc = ARM::VST3d32; break;
|
||||
}
|
||||
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
|
||||
N->getOperand(2), N->getOperand(3),
|
||||
N->getOperand(4) };
|
||||
return CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 6);
|
||||
}
|
||||
|
||||
case ARMISD::VST4D: {
|
||||
SDValue MemAddr, MemUpdate, MemOpc;
|
||||
if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc))
|
||||
return NULL;
|
||||
unsigned Opc = 0;
|
||||
switch (N->getOperand(2).getValueType().getSimpleVT()) {
|
||||
default: llvm_unreachable("unhandled VST4D type");
|
||||
case MVT::v8i8: Opc = ARM::VST4d8; break;
|
||||
case MVT::v4i16: Opc = ARM::VST4d16; break;
|
||||
case MVT::v2f32:
|
||||
case MVT::v2i32: Opc = ARM::VST4d32; break;
|
||||
}
|
||||
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc,
|
||||
N->getOperand(2), N->getOperand(3),
|
||||
N->getOperand(4), N->getOperand(5) };
|
||||
return CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 7);
|
||||
}
|
||||
}
|
||||
|
||||
return SelectCode(Op);
|
||||
|
@ -323,6 +323,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||
// We want to custom lower some of our intrinsics.
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
||||
|
||||
setOperationAction(ISD::SETCC, MVT::i32, Expand);
|
||||
setOperationAction(ISD::SETCC, MVT::f32, Expand);
|
||||
@ -466,6 +467,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case ARMISD::VLD2D: return "ARMISD::VLD2D";
|
||||
case ARMISD::VLD3D: return "ARMISD::VLD3D";
|
||||
case ARMISD::VLD4D: return "ARMISD::VLD4D";
|
||||
case ARMISD::VST2D: return "ARMISD::VST2D";
|
||||
case ARMISD::VST3D: return "ARMISD::VST3D";
|
||||
case ARMISD::VST4D: return "ARMISD::VST4D";
|
||||
}
|
||||
}
|
||||
|
||||
@ -1325,6 +1329,23 @@ static SDValue LowerNeonVLDIntrinsic(SDValue Op, SelectionDAG &DAG,
|
||||
return DAG.getNode(Opcode, dl, Node->getVTList(), Ops, 2);
|
||||
}
|
||||
|
||||
static SDValue LowerNeonVSTIntrinsic(SDValue Op, SelectionDAG &DAG,
|
||||
unsigned Opcode, unsigned NumVecs) {
|
||||
SDNode *Node = Op.getNode();
|
||||
MVT VT = Node->getOperand(3).getValueType();
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
|
||||
if (!VT.is64BitVector())
|
||||
return SDValue(); // unimplemented
|
||||
|
||||
SmallVector<SDValue, 6> Ops;
|
||||
Ops.push_back(Node->getOperand(0));
|
||||
Ops.push_back(Node->getOperand(2));
|
||||
for (unsigned N = 0; N < NumVecs; ++N)
|
||||
Ops.push_back(Node->getOperand(N + 3));
|
||||
return DAG.getNode(Opcode, dl, MVT::Other, Ops.data(), Ops.size());
|
||||
}
|
||||
|
||||
SDValue
|
||||
ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
||||
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
@ -1340,10 +1361,13 @@ ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
||||
return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD4D);
|
||||
case Intrinsic::arm_neon_vst2i:
|
||||
case Intrinsic::arm_neon_vst2f:
|
||||
return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST2D, 2);
|
||||
case Intrinsic::arm_neon_vst3i:
|
||||
case Intrinsic::arm_neon_vst3f:
|
||||
return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST3D, 3);
|
||||
case Intrinsic::arm_neon_vst4i:
|
||||
case Intrinsic::arm_neon_vst4f:
|
||||
return LowerNeonVSTIntrinsic(Op, DAG, ARMISD::VST4D, 4);
|
||||
default: return SDValue(); // Don't custom lower most intrinsics.
|
||||
}
|
||||
}
|
||||
@ -2381,6 +2405,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
|
||||
case ISD::RETURNADDR: break;
|
||||
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
|
||||
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
|
||||
case ISD::INTRINSIC_VOID:
|
||||
case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
|
||||
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
||||
case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG);
|
||||
|
@ -119,7 +119,10 @@ namespace llvm {
|
||||
// Vector load/store with (de)interleaving
|
||||
VLD2D,
|
||||
VLD3D,
|
||||
VLD4D
|
||||
VLD4D,
|
||||
VST2D,
|
||||
VST3D,
|
||||
VST4D
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -81,6 +81,20 @@ def NEONvld3d : SDNode<"ARMISD::VLD3D", SDTARMVLD3,
|
||||
def NEONvld4d : SDNode<"ARMISD::VLD4D", SDTARMVLD4,
|
||||
[SDNPHasChain, SDNPMayLoad]>;
|
||||
|
||||
def SDTARMVST2 : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>]>;
|
||||
def SDTARMVST3 : SDTypeProfile<0, 4, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
|
||||
SDTCisSameAs<1, 3>]>;
|
||||
def SDTARMVST4 : SDTypeProfile<0, 5, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
|
||||
SDTCisSameAs<1, 3>,
|
||||
SDTCisSameAs<1, 4>]>;
|
||||
|
||||
def NEONvst2d : SDNode<"ARMISD::VST2D", SDTARMVST2,
|
||||
[SDNPHasChain, SDNPMayStore]>;
|
||||
def NEONvst3d : SDNode<"ARMISD::VST3D", SDTARMVST3,
|
||||
[SDNPHasChain, SDNPMayStore]>;
|
||||
def NEONvst4d : SDNode<"ARMISD::VST4D", SDTARMVST4,
|
||||
[SDNPHasChain, SDNPMayStore]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// NEON operand definitions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -172,30 +186,6 @@ def VLD1q32 : VLD1Q<"vld1.32", v4i32, int_arm_neon_vld1i>;
|
||||
def VLD1qf : VLD1Q<"vld1.32", v4f32, int_arm_neon_vld1f>;
|
||||
def VLD1q64 : VLD1Q<"vld1.64", v2i64, int_arm_neon_vld1i>;
|
||||
|
||||
// VST1 : Vector Store (multiple single elements)
|
||||
class VST1D<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
|
||||
: NLdSt<(outs), (ins addrmode6:$addr, DPR:$src),
|
||||
NoItinerary,
|
||||
!strconcat(OpcodeStr, "\t\\{$src\\}, $addr"),
|
||||
[(IntOp addrmode6:$addr, (Ty DPR:$src))]>;
|
||||
class VST1Q<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
|
||||
: NLdSt<(outs), (ins addrmode6:$addr, QPR:$src),
|
||||
NoItinerary,
|
||||
!strconcat(OpcodeStr, "\t${src:dregpair}, $addr"),
|
||||
[(IntOp addrmode6:$addr, (Ty QPR:$src))]>;
|
||||
|
||||
def VST1d8 : VST1D<"vst1.8", v8i8, int_arm_neon_vst1i>;
|
||||
def VST1d16 : VST1D<"vst1.16", v4i16, int_arm_neon_vst1i>;
|
||||
def VST1d32 : VST1D<"vst1.32", v2i32, int_arm_neon_vst1i>;
|
||||
def VST1df : VST1D<"vst1.32", v2f32, int_arm_neon_vst1f>;
|
||||
def VST1d64 : VST1D<"vst1.64", v1i64, int_arm_neon_vst1i>;
|
||||
|
||||
def VST1q8 : VST1Q<"vst1.8", v16i8, int_arm_neon_vst1i>;
|
||||
def VST1q16 : VST1Q<"vst1.16", v8i16, int_arm_neon_vst1i>;
|
||||
def VST1q32 : VST1Q<"vst1.32", v4i32, int_arm_neon_vst1i>;
|
||||
def VST1qf : VST1Q<"vst1.32", v4f32, int_arm_neon_vst1f>;
|
||||
def VST1q64 : VST1Q<"vst1.64", v2i64, int_arm_neon_vst1i>;
|
||||
|
||||
// VLD2 : Vector Load (multiple 2-element structures)
|
||||
class VLD2D<string OpcodeStr>
|
||||
: NLdSt<(outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr),
|
||||
@ -227,6 +217,59 @@ def VLD4d8 : VLD4D<"vld4.8">;
|
||||
def VLD4d16 : VLD4D<"vld4.16">;
|
||||
def VLD4d32 : VLD4D<"vld4.32">;
|
||||
|
||||
// VST1 : Vector Store (multiple single elements)
|
||||
class VST1D<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
|
||||
: NLdSt<(outs), (ins addrmode6:$addr, DPR:$src),
|
||||
NoItinerary,
|
||||
!strconcat(OpcodeStr, "\t\\{$src\\}, $addr"),
|
||||
[(IntOp addrmode6:$addr, (Ty DPR:$src))]>;
|
||||
class VST1Q<string OpcodeStr, ValueType Ty, Intrinsic IntOp>
|
||||
: NLdSt<(outs), (ins addrmode6:$addr, QPR:$src),
|
||||
NoItinerary,
|
||||
!strconcat(OpcodeStr, "\t${src:dregpair}, $addr"),
|
||||
[(IntOp addrmode6:$addr, (Ty QPR:$src))]>;
|
||||
|
||||
def VST1d8 : VST1D<"vst1.8", v8i8, int_arm_neon_vst1i>;
|
||||
def VST1d16 : VST1D<"vst1.16", v4i16, int_arm_neon_vst1i>;
|
||||
def VST1d32 : VST1D<"vst1.32", v2i32, int_arm_neon_vst1i>;
|
||||
def VST1df : VST1D<"vst1.32", v2f32, int_arm_neon_vst1f>;
|
||||
def VST1d64 : VST1D<"vst1.64", v1i64, int_arm_neon_vst1i>;
|
||||
|
||||
def VST1q8 : VST1Q<"vst1.8", v16i8, int_arm_neon_vst1i>;
|
||||
def VST1q16 : VST1Q<"vst1.16", v8i16, int_arm_neon_vst1i>;
|
||||
def VST1q32 : VST1Q<"vst1.32", v4i32, int_arm_neon_vst1i>;
|
||||
def VST1qf : VST1Q<"vst1.32", v4f32, int_arm_neon_vst1f>;
|
||||
def VST1q64 : VST1Q<"vst1.64", v2i64, int_arm_neon_vst1i>;
|
||||
|
||||
// VST2 : Vector Store (multiple 2-element structures)
|
||||
class VST2D<string OpcodeStr>
|
||||
: NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), NoItinerary,
|
||||
!strconcat(OpcodeStr, "\t\\{$src1,$src2\\}, $addr"), []>;
|
||||
|
||||
def VST2d8 : VST2D<"vst2.8">;
|
||||
def VST2d16 : VST2D<"vst2.16">;
|
||||
def VST2d32 : VST2D<"vst2.32">;
|
||||
|
||||
// VST3 : Vector Store (multiple 3-element structures)
|
||||
class VST3D<string OpcodeStr>
|
||||
: NLdSt<(outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
|
||||
NoItinerary,
|
||||
!strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), []>;
|
||||
|
||||
def VST3d8 : VST3D<"vst3.8">;
|
||||
def VST3d16 : VST3D<"vst3.16">;
|
||||
def VST3d32 : VST3D<"vst3.32">;
|
||||
|
||||
// VST4 : Vector Store (multiple 4-element structures)
|
||||
class VST4D<string OpcodeStr>
|
||||
: NLdSt<(outs), (ins addrmode6:$addr,
|
||||
DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), NoItinerary,
|
||||
!strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), []>;
|
||||
|
||||
def VST4d8 : VST4D<"vst4.8">;
|
||||
def VST4d16 : VST4D<"vst4.16">;
|
||||
def VST4d32 : VST4D<"vst4.32">;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// NEON pattern fragments
|
||||
|
@ -62,6 +62,27 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd,
|
||||
FirstOpnd = 0;
|
||||
NumRegs = 4;
|
||||
return true;
|
||||
|
||||
case ARM::VST2d8:
|
||||
case ARM::VST2d16:
|
||||
case ARM::VST2d32:
|
||||
FirstOpnd = 3;
|
||||
NumRegs = 2;
|
||||
return true;
|
||||
|
||||
case ARM::VST3d8:
|
||||
case ARM::VST3d16:
|
||||
case ARM::VST3d32:
|
||||
FirstOpnd = 3;
|
||||
NumRegs = 3;
|
||||
return true;
|
||||
|
||||
case ARM::VST4d8:
|
||||
case ARM::VST4d16:
|
||||
case ARM::VST4d32:
|
||||
FirstOpnd = 3;
|
||||
NumRegs = 4;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
38
test/CodeGen/ARM/vst2.ll
Normal file
38
test/CodeGen/ARM/vst2.ll
Normal file
@ -0,0 +1,38 @@
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | FileCheck %s
|
||||
|
||||
define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
|
||||
;CHECK: vst2i8:
|
||||
;CHECK: vst2.8
|
||||
%tmp1 = load <8 x i8>* %B
|
||||
call void @llvm.arm.neon.vst2i.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
|
||||
;CHECK: vst2i16:
|
||||
;CHECK: vst2.16
|
||||
%tmp1 = load <4 x i16>* %B
|
||||
call void @llvm.arm.neon.vst2i.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
|
||||
;CHECK: vst2i32:
|
||||
;CHECK: vst2.32
|
||||
%tmp1 = load <2 x i32>* %B
|
||||
call void @llvm.arm.neon.vst2i.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst2f(float* %A, <2 x float>* %B) nounwind {
|
||||
;CHECK: vst2f:
|
||||
;CHECK: vst2.32
|
||||
%tmp1 = load <2 x float>* %B
|
||||
call void @llvm.arm.neon.vst2f.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.arm.neon.vst2i.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind
|
||||
declare void @llvm.arm.neon.vst2i.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind
|
||||
declare void @llvm.arm.neon.vst2i.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind
|
||||
declare void @llvm.arm.neon.vst2f.v2f32(i8*, <2 x float>, <2 x float>) nounwind
|
38
test/CodeGen/ARM/vst3.ll
Normal file
38
test/CodeGen/ARM/vst3.ll
Normal file
@ -0,0 +1,38 @@
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | FileCheck %s
|
||||
|
||||
define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
|
||||
;CHECK: vst3i8:
|
||||
;CHECK: vst3.8
|
||||
%tmp1 = load <8 x i8>* %B
|
||||
call void @llvm.arm.neon.vst3i.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
|
||||
;CHECK: vst3i16:
|
||||
;CHECK: vst3.16
|
||||
%tmp1 = load <4 x i16>* %B
|
||||
call void @llvm.arm.neon.vst3i.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
|
||||
;CHECK: vst3i32:
|
||||
;CHECK: vst3.32
|
||||
%tmp1 = load <2 x i32>* %B
|
||||
call void @llvm.arm.neon.vst3i.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst3f(float* %A, <2 x float>* %B) nounwind {
|
||||
;CHECK: vst3f:
|
||||
;CHECK: vst3.32
|
||||
%tmp1 = load <2 x float>* %B
|
||||
call void @llvm.arm.neon.vst3f.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.arm.neon.vst3i.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
|
||||
declare void @llvm.arm.neon.vst3i.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
|
||||
declare void @llvm.arm.neon.vst3i.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
|
||||
declare void @llvm.arm.neon.vst3f.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind
|
38
test/CodeGen/ARM/vst4.ll
Normal file
38
test/CodeGen/ARM/vst4.ll
Normal file
@ -0,0 +1,38 @@
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+neon | FileCheck %s
|
||||
|
||||
define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
|
||||
;CHECK: vst4i8:
|
||||
;CHECK: vst4.8
|
||||
%tmp1 = load <8 x i8>* %B
|
||||
call void @llvm.arm.neon.vst4i.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
|
||||
;CHECK: vst4i16:
|
||||
;CHECK: vst4.16
|
||||
%tmp1 = load <4 x i16>* %B
|
||||
call void @llvm.arm.neon.vst4i.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
|
||||
;CHECK: vst4i32:
|
||||
;CHECK: vst4.32
|
||||
%tmp1 = load <2 x i32>* %B
|
||||
call void @llvm.arm.neon.vst4i.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @vst4f(float* %A, <2 x float>* %B) nounwind {
|
||||
;CHECK: vst4f:
|
||||
;CHECK: vst4.32
|
||||
%tmp1 = load <2 x float>* %B
|
||||
call void @llvm.arm.neon.vst4f.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.arm.neon.vst4i.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
|
||||
declare void @llvm.arm.neon.vst4i.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
|
||||
declare void @llvm.arm.neon.vst4i.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
|
||||
declare void @llvm.arm.neon.vst4f.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind
|
Loading…
x
Reference in New Issue
Block a user