mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 04:30:23 +00:00
ARM: Implement big endian bit-conversion for NEON type
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@208538 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7aa3041087
commit
5c39a97a60
@ -189,6 +189,8 @@ class ARMFastISel final : public FastISel {
|
||||
unsigned ARMSelectCallOp(bool UseReg);
|
||||
unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
|
||||
|
||||
const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); }
|
||||
|
||||
// Call handling routines.
|
||||
private:
|
||||
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
|
||||
|
@ -3964,8 +3964,14 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
|
||||
|
||||
// Turn f64->i64 into VMOVRRD.
|
||||
if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
|
||||
SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
|
||||
DAG.getVTList(MVT::i32, MVT::i32), Op);
|
||||
SDValue Cvt;
|
||||
if (TLI.isBigEndian() && SrcVT.isVector())
|
||||
Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
|
||||
DAG.getVTList(MVT::i32, MVT::i32),
|
||||
DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
|
||||
else
|
||||
Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
|
||||
DAG.getVTList(MVT::i32, MVT::i32), Op);
|
||||
// Merge the pieces into a single i64 value.
|
||||
return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
|
||||
}
|
||||
|
@ -2366,9 +2366,9 @@ def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
|
||||
def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
|
||||
(VST1q64 addrmode6:$addr, QPR:$value)>;
|
||||
def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
|
||||
(VLD1q32 addrmode6:$addr)>;
|
||||
(VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>;
|
||||
def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
|
||||
(VST1q32 addrmode6:$addr, QPR:$value)>;
|
||||
(VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
|
||||
def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
|
||||
(VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
|
||||
def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
|
||||
@ -6176,67 +6176,145 @@ def : Pat<(f32 (bitconvert GPR:$a)),
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// bit_convert
|
||||
def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
|
||||
def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
|
||||
def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
|
||||
let Predicates = [IsLE] in {
|
||||
def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
|
||||
def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
|
||||
def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
|
||||
}
|
||||
def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
|
||||
def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
|
||||
let Predicates = [IsLE] in {
|
||||
def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
|
||||
}
|
||||
def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
|
||||
let Predicates = [IsLE] in {
|
||||
def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
|
||||
}
|
||||
def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
|
||||
let Predicates = [IsLE] in {
|
||||
def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
|
||||
}
|
||||
def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
|
||||
let Predicates = [IsLE] in {
|
||||
def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
|
||||
}
|
||||
|
||||
def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
let Predicates = [IsLE] in {
|
||||
def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
}
|
||||
def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
let Predicates = [IsLE] in {
|
||||
def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
}
|
||||
def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
let Predicates = [IsLE] in {
|
||||
def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
}
|
||||
def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
let Predicates = [IsLE] in {
|
||||
def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
|
||||
}
|
||||
def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
let Predicates = [IsLE] in {
|
||||
def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [IsBE] in {
|
||||
// 64 bit conversions
|
||||
def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
|
||||
def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
|
||||
def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
|
||||
def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
|
||||
def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
|
||||
|
||||
// 128 bit conversions
|
||||
def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
|
||||
}
|
||||
|
||||
// Fold extracting an element out of a v2i32 into a vfp register.
|
||||
def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
|
||||
|
392
test/CodeGen/ARM/big-endian-neon-bitconv.ll
Normal file
392
test/CodeGen/ARM/big-endian-neon-bitconv.ll
Normal file
@ -0,0 +1,392 @@
|
||||
; RUN: llc < %s -march armeb -mtriple arm-eabi -mattr v7,neon -float-abi soft -o - | FileCheck %s
|
||||
; RUN: llc < %s -march armeb -mtriple arm-eabi -mattr v7,neon -float-abi hard -o - | FileCheck %s -check-prefix CHECK-HARD
|
||||
|
||||
@v2i64 = global <2 x i64> zeroinitializer
|
||||
@v2i32 = global <2 x i32> zeroinitializer
|
||||
@v4i32 = global <4 x i32> zeroinitializer
|
||||
@v4i16 = global <4 x i16> zeroinitializer
|
||||
@v8i16 = global <8 x i16> zeroinitializer
|
||||
@v8i8 = global <8 x i8> zeroinitializer
|
||||
@v16i8 = global <16 x i8> zeroinitializer
|
||||
|
||||
@v2f32 = global <2 x float> zeroinitializer
|
||||
@v2f64 = global <2 x double> zeroinitializer
|
||||
@v4f32 = global <4 x float> zeroinitializer
|
||||
|
||||
|
||||
; 64 bit conversions
|
||||
define void @conv_i64_to_v8i8( i64 %val, <8 x i8>* %store ) {
|
||||
; CHECK-LABEL: conv_i64_to_v8i8:
|
||||
; CHECK: vrev64.8
|
||||
%v = bitcast i64 %val to <8 x i8>
|
||||
%w = load <8 x i8>* @v8i8
|
||||
%a = add <8 x i8> %v, %w
|
||||
store <8 x i8> %a, <8 x i8>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v8i8_to_i64( <8 x i8>* %load, <8 x i8>* %store ) {
|
||||
; CHECK-LABEL: conv_v8i8_to_i64:
|
||||
; CHECK: vrev64.8
|
||||
%v = load <8 x i8>* %load
|
||||
%w = load <8 x i8>* @v8i8
|
||||
%a = add <8 x i8> %v, %w
|
||||
%f = bitcast <8 x i8> %a to i64
|
||||
call void @conv_i64_to_v8i8( i64 %f, <8 x i8>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_i64_to_v4i16( i64 %val, <4 x i16>* %store ) {
|
||||
; CHECK-LABEL: conv_i64_to_v4i16:
|
||||
; CHECK: vrev64.16
|
||||
%v = bitcast i64 %val to <4 x i16>
|
||||
%w = load <4 x i16>* @v4i16
|
||||
%a = add <4 x i16> %v, %w
|
||||
store <4 x i16> %a, <4 x i16>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v4i16_to_i64( <4 x i16>* %load, <4 x i16>* %store ) {
|
||||
; CHECK-LABEL: conv_v4i16_to_i64:
|
||||
; CHECK: vrev64.16
|
||||
%v = load <4 x i16>* %load
|
||||
%w = load <4 x i16>* @v4i16
|
||||
%a = add <4 x i16> %v, %w
|
||||
%f = bitcast <4 x i16> %a to i64
|
||||
call void @conv_i64_to_v4i16( i64 %f, <4 x i16>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_i64_to_v2i32( i64 %val, <2 x i32>* %store ) {
|
||||
; CHECK-LABEL: conv_i64_to_v2i32:
|
||||
; CHECK: vrev64.32
|
||||
%v = bitcast i64 %val to <2 x i32>
|
||||
%w = load <2 x i32>* @v2i32
|
||||
%a = add <2 x i32> %v, %w
|
||||
store <2 x i32> %a, <2 x i32>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v2i32_to_i64( <2 x i32>* %load, <2 x i32>* %store ) {
|
||||
; CHECK-LABEL: conv_v2i32_to_i64:
|
||||
; CHECK: vrev64.32
|
||||
%v = load <2 x i32>* %load
|
||||
%w = load <2 x i32>* @v2i32
|
||||
%a = add <2 x i32> %v, %w
|
||||
%f = bitcast <2 x i32> %a to i64
|
||||
call void @conv_i64_to_v2i32( i64 %f, <2 x i32>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_i64_to_v2f32( i64 %val, <2 x float>* %store ) {
|
||||
; CHECK-LABEL: conv_i64_to_v2f32:
|
||||
; CHECK: vrev64.32
|
||||
%v = bitcast i64 %val to <2 x float>
|
||||
%w = load <2 x float>* @v2f32
|
||||
%a = fadd <2 x float> %v, %w
|
||||
store <2 x float> %a, <2 x float>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v2f32_to_i64( <2 x float>* %load, <2 x float>* %store ) {
|
||||
; CHECK-LABEL: conv_v2f32_to_i64:
|
||||
; CHECK: vrev64.32
|
||||
%v = load <2 x float>* %load
|
||||
%w = load <2 x float>* @v2f32
|
||||
%a = fadd <2 x float> %v, %w
|
||||
%f = bitcast <2 x float> %a to i64
|
||||
call void @conv_i64_to_v2f32( i64 %f, <2 x float>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_f64_to_v8i8( double %val, <8 x i8>* %store ) {
|
||||
; CHECK-LABEL: conv_f64_to_v8i8:
|
||||
; CHECK: vrev64.8
|
||||
%v = bitcast double %val to <8 x i8>
|
||||
%w = load <8 x i8>* @v8i8
|
||||
%a = add <8 x i8> %v, %w
|
||||
store <8 x i8> %a, <8 x i8>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v8i8_to_f64( <8 x i8>* %load, <8 x i8>* %store ) {
|
||||
; CHECK-LABEL: conv_v8i8_to_f64:
|
||||
; CHECK: vrev64.8
|
||||
%v = load <8 x i8>* %load
|
||||
%w = load <8 x i8>* @v8i8
|
||||
%a = add <8 x i8> %v, %w
|
||||
%f = bitcast <8 x i8> %a to double
|
||||
call void @conv_f64_to_v8i8( double %f, <8 x i8>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_f64_to_v4i16( double %val, <4 x i16>* %store ) {
|
||||
; CHECK-LABEL: conv_f64_to_v4i16:
|
||||
; CHECK: vrev64.16
|
||||
%v = bitcast double %val to <4 x i16>
|
||||
%w = load <4 x i16>* @v4i16
|
||||
%a = add <4 x i16> %v, %w
|
||||
store <4 x i16> %a, <4 x i16>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v4i16_to_f64( <4 x i16>* %load, <4 x i16>* %store ) {
|
||||
; CHECK-LABEL: conv_v4i16_to_f64:
|
||||
; CHECK: vrev64.16
|
||||
%v = load <4 x i16>* %load
|
||||
%w = load <4 x i16>* @v4i16
|
||||
%a = add <4 x i16> %v, %w
|
||||
%f = bitcast <4 x i16> %a to double
|
||||
call void @conv_f64_to_v4i16( double %f, <4 x i16>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_f64_to_v2i32( double %val, <2 x i32>* %store ) {
|
||||
; CHECK-LABEL: conv_f64_to_v2i32:
|
||||
; CHECK: vrev64.32
|
||||
%v = bitcast double %val to <2 x i32>
|
||||
%w = load <2 x i32>* @v2i32
|
||||
%a = add <2 x i32> %v, %w
|
||||
store <2 x i32> %a, <2 x i32>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v2i32_to_f64( <2 x i32>* %load, <2 x i32>* %store ) {
|
||||
; CHECK-LABEL: conv_v2i32_to_f64:
|
||||
; CHECK: vrev64.32
|
||||
%v = load <2 x i32>* %load
|
||||
%w = load <2 x i32>* @v2i32
|
||||
%a = add <2 x i32> %v, %w
|
||||
%f = bitcast <2 x i32> %a to double
|
||||
call void @conv_f64_to_v2i32( double %f, <2 x i32>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_f64_to_v2f32( double %val, <2 x float>* %store ) {
|
||||
; CHECK-LABEL: conv_f64_to_v2f32:
|
||||
; CHECK: vrev64.32
|
||||
%v = bitcast double %val to <2 x float>
|
||||
%w = load <2 x float>* @v2f32
|
||||
%a = fadd <2 x float> %v, %w
|
||||
store <2 x float> %a, <2 x float>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v2f32_to_f64( <2 x float>* %load, <2 x float>* %store ) {
|
||||
; CHECK-LABEL: conv_v2f32_to_f64:
|
||||
; CHECK: vrev64.32
|
||||
%v = load <2 x float>* %load
|
||||
%w = load <2 x float>* @v2f32
|
||||
%a = fadd <2 x float> %v, %w
|
||||
%f = bitcast <2 x float> %a to double
|
||||
call void @conv_f64_to_v2f32( double %f, <2 x float>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
; 128 bit conversions
|
||||
|
||||
|
||||
define void @conv_i128_to_v16i8( i128 %val, <16 x i8>* %store ) {
|
||||
; CHECK-LABEL: conv_i128_to_v16i8:
|
||||
; CHECK: vrev32.8
|
||||
%v = bitcast i128 %val to <16 x i8>
|
||||
%w = load <16 x i8>* @v16i8
|
||||
%a = add <16 x i8> %v, %w
|
||||
store <16 x i8> %a, <16 x i8>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v16i8_to_i128( <16 x i8>* %load, <16 x i8>* %store ) {
|
||||
; CHECK-LABEL: conv_v16i8_to_i128:
|
||||
; CHECK: vrev32.8
|
||||
%v = load <16 x i8>* %load
|
||||
%w = load <16 x i8>* @v16i8
|
||||
%a = add <16 x i8> %v, %w
|
||||
%f = bitcast <16 x i8> %a to i128
|
||||
call void @conv_i128_to_v16i8( i128 %f, <16 x i8>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_i128_to_v8i16( i128 %val, <8 x i16>* %store ) {
|
||||
; CHECK-LABEL: conv_i128_to_v8i16:
|
||||
; CHECK: vrev32.16
|
||||
%v = bitcast i128 %val to <8 x i16>
|
||||
%w = load <8 x i16>* @v8i16
|
||||
%a = add <8 x i16> %v, %w
|
||||
store <8 x i16> %a, <8 x i16>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v8i16_to_i128( <8 x i16>* %load, <8 x i16>* %store ) {
|
||||
; CHECK-LABEL: conv_v8i16_to_i128:
|
||||
; CHECK: vrev32.16
|
||||
%v = load <8 x i16>* %load
|
||||
%w = load <8 x i16>* @v8i16
|
||||
%a = add <8 x i16> %v, %w
|
||||
%f = bitcast <8 x i16> %a to i128
|
||||
call void @conv_i128_to_v8i16( i128 %f, <8 x i16>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_i128_to_v4i32( i128 %val, <4 x i32>* %store ) {
|
||||
; CHECK-LABEL: conv_i128_to_v4i32:
|
||||
; CHECK: vrev64.32
|
||||
%v = bitcast i128 %val to <4 x i32>
|
||||
%w = load <4 x i32>* @v4i32
|
||||
%a = add <4 x i32> %v, %w
|
||||
store <4 x i32> %a, <4 x i32>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v4i32_to_i128( <4 x i32>* %load, <4 x i32>* %store ) {
|
||||
; CHECK-LABEL: conv_v4i32_to_i128:
|
||||
; CHECK: vrev64.32
|
||||
%v = load <4 x i32>* %load
|
||||
%w = load <4 x i32>* @v4i32
|
||||
%a = add <4 x i32> %v, %w
|
||||
%f = bitcast <4 x i32> %a to i128
|
||||
call void @conv_i128_to_v4i32( i128 %f, <4 x i32>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_i128_to_v4f32( i128 %val, <4 x float>* %store ) {
|
||||
; CHECK-LABEL: conv_i128_to_v4f32:
|
||||
; CHECK: vrev64.32
|
||||
%v = bitcast i128 %val to <4 x float>
|
||||
%w = load <4 x float>* @v4f32
|
||||
%a = fadd <4 x float> %v, %w
|
||||
store <4 x float> %a, <4 x float>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v4f32_to_i128( <4 x float>* %load, <4 x float>* %store ) {
|
||||
; CHECK-LABEL: conv_v4f32_to_i128:
|
||||
; CHECK: vrev64.32
|
||||
%v = load <4 x float>* %load
|
||||
%w = load <4 x float>* @v4f32
|
||||
%a = fadd <4 x float> %v, %w
|
||||
%f = bitcast <4 x float> %a to i128
|
||||
call void @conv_i128_to_v4f32( i128 %f, <4 x float>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_f128_to_v2f64( fp128 %val, <2 x double>* %store ) {
|
||||
; CHECK-LABEL: conv_f128_to_v2f64:
|
||||
; CHECK: vrev64.32
|
||||
%v = bitcast fp128 %val to <2 x double>
|
||||
%w = load <2 x double>* @v2f64
|
||||
%a = fadd <2 x double> %v, %w
|
||||
store <2 x double> %a, <2 x double>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v2f64_to_f128( <2 x double>* %load, <2 x double>* %store ) {
|
||||
; CHECK-LABEL: conv_v2f64_to_f128:
|
||||
; CHECK: vrev64.32
|
||||
%v = load <2 x double>* %load
|
||||
%w = load <2 x double>* @v2f64
|
||||
%a = fadd <2 x double> %v, %w
|
||||
%f = bitcast <2 x double> %a to fp128
|
||||
call void @conv_f128_to_v2f64( fp128 %f, <2 x double>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_f128_to_v16i8( fp128 %val, <16 x i8>* %store ) {
|
||||
; CHECK-LABEL: conv_f128_to_v16i8:
|
||||
; CHECK: vrev32.8
|
||||
%v = bitcast fp128 %val to <16 x i8>
|
||||
%w = load <16 x i8>* @v16i8
|
||||
%a = add <16 x i8> %v, %w
|
||||
store <16 x i8> %a, <16 x i8>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v16i8_to_f128( <16 x i8>* %load, <16 x i8>* %store ) {
|
||||
; CHECK-LABEL: conv_v16i8_to_f128:
|
||||
; CHECK: vrev32.8
|
||||
%v = load <16 x i8>* %load
|
||||
%w = load <16 x i8>* @v16i8
|
||||
%a = add <16 x i8> %v, %w
|
||||
%f = bitcast <16 x i8> %a to fp128
|
||||
call void @conv_f128_to_v16i8( fp128 %f, <16 x i8>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_f128_to_v8i16( fp128 %val, <8 x i16>* %store ) {
|
||||
; CHECK-LABEL: conv_f128_to_v8i16:
|
||||
; CHECK: vrev32.16
|
||||
%v = bitcast fp128 %val to <8 x i16>
|
||||
%w = load <8 x i16>* @v8i16
|
||||
%a = add <8 x i16> %v, %w
|
||||
store <8 x i16> %a, <8 x i16>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v8i16_to_f128( <8 x i16>* %load, <8 x i16>* %store ) {
|
||||
; CHECK-LABEL: conv_v8i16_to_f128:
|
||||
; CHECK: vrev32.16
|
||||
%v = load <8 x i16>* %load
|
||||
%w = load <8 x i16>* @v8i16
|
||||
%a = add <8 x i16> %v, %w
|
||||
%f = bitcast <8 x i16> %a to fp128
|
||||
call void @conv_f128_to_v8i16( fp128 %f, <8 x i16>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_f128_to_v4f32( fp128 %val, <4 x float>* %store ) {
|
||||
; CHECK-LABEL: conv_f128_to_v4f32:
|
||||
; CHECK: vrev64.32
|
||||
%v = bitcast fp128 %val to <4 x float>
|
||||
%w = load <4 x float>* @v4f32
|
||||
%a = fadd <4 x float> %v, %w
|
||||
store <4 x float> %a, <4 x float>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @conv_v4f32_to_f128( <4 x float>* %load, <4 x float>* %store ) {
|
||||
; CHECK-LABEL: conv_v4f32_to_f128:
|
||||
; CHECK: vrev64.32
|
||||
%v = load <4 x float>* %load
|
||||
%w = load <4 x float>* @v4f32
|
||||
%a = fadd <4 x float> %v, %w
|
||||
%f = bitcast <4 x float> %a to fp128
|
||||
call void @conv_f128_to_v4f32( fp128 %f, <4 x float>* %store )
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @arg_v4i32( <4 x i32> %var, <4 x i32>* %store ) {
|
||||
; CHECK-LABEL: arg_v4i32:
|
||||
; CHECK: vmov [[REG2:d[0-9]+]], r3, r2
|
||||
; CHECK: vmov [[REG1:d[0-9]+]], r1, r0
|
||||
; CHECK: vst1.64 {[[REG1]], [[REG2]]},
|
||||
; CHECK-HARD-LABEL: arg_v4i32:
|
||||
; CHECK-HARD-NOT: vmov
|
||||
; CHECK-HARD: vst1.64 {d0, d1}
|
||||
store <4 x i32> %var, <4 x i32>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @arg_v8i16( <8 x i16> %var, <8 x i16>* %store ) {
|
||||
; CHECK-LABEL: arg_v8i16:
|
||||
; CHECK: vmov [[REG2:d[0-9]+]], r3, r2
|
||||
; CHECK: vmov [[REG1:d[0-9]+]], r1, r0
|
||||
; CHECK: vst1.64 {[[REG1]], [[REG2]]},
|
||||
; CHECK-HARD-LABEL: arg_v8i16:
|
||||
; CHECK-HARD-NOT: vmov
|
||||
; CHECK-HARD: vst1.64 {d0, d1}
|
||||
store <8 x i16> %var, <8 x i16>* %store
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @arg_v16i8( <16 x i8> %var, <16 x i8>* %store ) {
|
||||
; CHECK-LABEL: arg_v16i8:
|
||||
; CHECK: vmov [[REG2:d[0-9]+]], r3, r2
|
||||
; CHECK: vmov [[REG1:d[0-9]+]], r1, r0
|
||||
; CHECK: vst1.64 {[[REG1]], [[REG2]]},
|
||||
; CHECK-HARD-LABEL: arg_v16i8:
|
||||
; CHECK-HARD-NOT: vmov
|
||||
; CHECK-HARD: vst1.64 {d0, d1}
|
||||
store <16 x i8> %var, <16 x i8>* %store
|
||||
ret void
|
||||
}
|
||||
|
@ -7,8 +7,8 @@
|
||||
; CHECK-LE-NEXT: vmov {{d[0-9]+}}, r1, r2
|
||||
; CHECK-LE-NEXT: vmov {{d[0-9]+}}, r3, [[REG]]
|
||||
; CHECK-BE-NEXT: vmov {{d[0-9]+}}, r2, r1
|
||||
; CHECK-BE-NEXT: vmov {{d[0-9]+}}, [[REG]], r3
|
||||
; CHECK-NEXT: vst1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
|
||||
; CHECK-BE: vmov {{d[0-9]+}}, [[REG]], r3
|
||||
; CHECK: vst1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
|
||||
; CHECK-NEXT: bx lr
|
||||
define void @test1(i8* %arg, [4 x i64] %vec.coerce) {
|
||||
bb:
|
||||
|
@ -78,7 +78,7 @@ define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
|
||||
; CHECK: vget_high8
|
||||
; CHECK-NOT: vst
|
||||
; CHECK-LE: vmov r0, r1, d17
|
||||
; CHECK-BE: vmov r1, r0, d17
|
||||
; CHECK-BE: vmov r1, r0, d16
|
||||
%tmp1 = load <16 x i8>* %A
|
||||
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
ret <8 x i8> %tmp2
|
||||
|
Loading…
Reference in New Issue
Block a user