mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-23 20:29:30 +00:00
This patch adds ABI support for v1i128 data type.
It adds v1i128 to the appropriate register classes and checks parameter passing and return values. This is related to http://reviews.llvm.org/D9081, which will add instructions that exploit the v1i128 datatype. Phabricator review: http://reviews.llvm.org/D9475 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236503 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a5f2faff5c
commit
c3c0de39db
@ -62,7 +62,8 @@ def RetCC_PPC : CallingConv<[
|
||||
|
||||
// Vector types returned as "direct" go into V2 .. V9; note that only the
|
||||
// ELFv2 ABI fully utilizes all these registers.
|
||||
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
|
||||
CCIfSubtarget<"hasAltivec()",
|
||||
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
|
||||
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
|
||||
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
|
||||
@ -114,7 +115,8 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
|
||||
CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
|
||||
CCIfType<[v4f64, v4f32, v4i1],
|
||||
CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
|
||||
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
|
||||
CCIfSubtarget<"hasAltivec()",
|
||||
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
|
||||
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
|
||||
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
|
||||
@ -172,9 +174,9 @@ def CC_PPC32_SVR4 : CallingConv<[
|
||||
CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
|
||||
|
||||
// The first 12 Vector arguments are passed in AltiVec registers.
|
||||
CCIfType<[v16i8, v8i16, v4i32, v4f32], CCIfSubtarget<"hasAltivec()",
|
||||
CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9,
|
||||
V10, V11, V12, V13]>>>,
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32],
|
||||
CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
|
||||
V8, V9, V10, V11, V12, V13]>>>,
|
||||
CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
|
||||
CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
|
||||
VSH10, VSH11, VSH12, VSH13]>>>,
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// FIXME: Remove this once soft-float is supported.
|
||||
@ -402,11 +403,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
||||
// will selectively turn on ones that can be effectively codegen'd.
|
||||
for (MVT VT : MVT::vector_valuetypes()) {
|
||||
// add/sub are legal for all supported vector VT's.
|
||||
setOperationAction(ISD::ADD , VT, Legal);
|
||||
setOperationAction(ISD::SUB , VT, Legal);
|
||||
|
||||
// This check is temporary until support for quadword add/sub is added
|
||||
if (VT.SimpleTy != MVT::v1i128) {
|
||||
setOperationAction(ISD::ADD , VT, Legal);
|
||||
setOperationAction(ISD::SUB , VT, Legal);
|
||||
}
|
||||
else {
|
||||
setOperationAction(ISD::ADD , VT, Expand);
|
||||
setOperationAction(ISD::SUB , VT, Expand);
|
||||
}
|
||||
|
||||
// Vector instructions introduced in P8
|
||||
if (Subtarget.hasP8Altivec()) {
|
||||
if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
|
||||
setOperationAction(ISD::CTPOP, VT, Legal);
|
||||
setOperationAction(ISD::CTLZ, VT, Legal);
|
||||
}
|
||||
@ -620,8 +628,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
|
||||
addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
|
||||
}
|
||||
|
||||
if (Subtarget.hasP8Altivec())
|
||||
if (Subtarget.hasP8Altivec()) {
|
||||
addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
|
||||
addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
|
||||
}
|
||||
}
|
||||
|
||||
if (Subtarget.hasQPX()) {
|
||||
@ -2473,7 +2483,8 @@ static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
|
||||
// Altivec parameters are padded to a 16 byte boundary.
|
||||
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
|
||||
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
|
||||
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
|
||||
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
|
||||
ArgVT == MVT::v1i128)
|
||||
Align = 16;
|
||||
// QPX vector types stored in double-precision are padded to a 32 byte
|
||||
// boundary.
|
||||
@ -2552,7 +2563,8 @@ static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
|
||||
}
|
||||
if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
|
||||
ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
|
||||
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
|
||||
ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
|
||||
ArgVT == MVT::v1i128)
|
||||
if (AvailableVRs > 0) {
|
||||
--AvailableVRs;
|
||||
return false;
|
||||
@ -3131,6 +3143,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
|
||||
case MVT::v16i8:
|
||||
case MVT::v2f64:
|
||||
case MVT::v2i64:
|
||||
case MVT::v1i128:
|
||||
if (!Subtarget.hasQPX()) {
|
||||
// These can be scalar arguments or elements of a vector array type
|
||||
// passed directly. The latter are used to implement ELFv2 homogenous
|
||||
@ -4605,6 +4618,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
|
||||
case MVT::v16i8:
|
||||
case MVT::v2f64:
|
||||
case MVT::v2i64:
|
||||
case MVT::v1i128:
|
||||
if (++NumVRsUsed <= NumVRs)
|
||||
continue;
|
||||
break;
|
||||
@ -4967,6 +4981,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
|
||||
case MVT::v16i8:
|
||||
case MVT::v2f64:
|
||||
case MVT::v2i64:
|
||||
case MVT::v1i128:
|
||||
if (!Subtarget.hasQPX()) {
|
||||
// These can be scalar arguments or elements of a vector array type
|
||||
// passed directly. The latter are used to implement ELFv2 homogenous
|
||||
|
@ -817,26 +817,37 @@ def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v2i64 VRRC:$src))), (v16i8 VRRC:$src)>;
|
||||
def : Pat<(v16i8 (bitconvert (v1i128 VRRC:$src))), (v16i8 VRRC:$src)>;
|
||||
|
||||
def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v2i64 VRRC:$src))), (v8i16 VRRC:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (v1i128 VRRC:$src))), (v8i16 VRRC:$src)>;
|
||||
|
||||
def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v2i64 VRRC:$src))), (v4i32 VRRC:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (v1i128 VRRC:$src))), (v4i32 VRRC:$src)>;
|
||||
|
||||
def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v2i64 VRRC:$src))), (v4f32 VRRC:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (v1i128 VRRC:$src))), (v4f32 VRRC:$src)>;
|
||||
|
||||
def : Pat<(v2i64 (bitconvert (v16i8 VRRC:$src))), (v2i64 VRRC:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v8i16 VRRC:$src))), (v2i64 VRRC:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v4i32 VRRC:$src))), (v2i64 VRRC:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v4f32 VRRC:$src))), (v2i64 VRRC:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (v1i128 VRRC:$src))), (v2i64 VRRC:$src)>;
|
||||
|
||||
def : Pat<(v1i128 (bitconvert (v16i8 VRRC:$src))), (v1i128 VRRC:$src)>;
|
||||
def : Pat<(v1i128 (bitconvert (v8i16 VRRC:$src))), (v1i128 VRRC:$src)>;
|
||||
def : Pat<(v1i128 (bitconvert (v4i32 VRRC:$src))), (v1i128 VRRC:$src)>;
|
||||
def : Pat<(v1i128 (bitconvert (v4f32 VRRC:$src))), (v1i128 VRRC:$src)>;
|
||||
def : Pat<(v1i128 (bitconvert (v2i64 VRRC:$src))), (v1i128 VRRC:$src)>;
|
||||
|
||||
// Shuffles.
|
||||
|
||||
|
@ -890,6 +890,11 @@ def : Pat<(v2f64 (bitconvert v2i64:$A)),
|
||||
def : Pat<(v2i64 (bitconvert v2f64:$A)),
|
||||
(COPY_TO_REGCLASS $A, VRRC)>;
|
||||
|
||||
def : Pat<(v2f64 (bitconvert v1i128:$A)),
|
||||
(COPY_TO_REGCLASS $A, VRRC)>;
|
||||
def : Pat<(v1i128 (bitconvert v2f64:$A)),
|
||||
(COPY_TO_REGCLASS $A, VRRC)>;
|
||||
|
||||
// sign extension patterns
|
||||
// To extend "in place" from v2i32 to v2i64, we have input data like:
|
||||
// | undef | i32 | undef | i32 |
|
||||
|
@ -288,7 +288,7 @@ def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
|
||||
(sequence "F%u", 31, 14))>;
|
||||
def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
|
||||
|
||||
def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v4f32], 128,
|
||||
def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32], 128,
|
||||
(add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
|
||||
V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
|
||||
V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
|
||||
|
284
test/CodeGen/PowerPC/ppc64-i128-abi.ll
Normal file
284
test/CodeGen/PowerPC/ppc64-i128-abi.ll
Normal file
@ -0,0 +1,284 @@
|
||||
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE
|
||||
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
|
||||
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
|
||||
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
|
||||
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX
|
||||
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX
|
||||
|
||||
@x = common global <1 x i128> zeroinitializer, align 16
|
||||
@y = common global <1 x i128> zeroinitializer, align 16
|
||||
@a = common global i128 zeroinitializer, align 16
|
||||
@b = common global i128 zeroinitializer, align 16
|
||||
|
||||
; VSX:
|
||||
; %a is passed in register 34
|
||||
; On LE, ensure %a is swapped before being used (using xxswapd)
|
||||
; Similarly, on LE ensure the results are swapped before being returned in
|
||||
; register 34
|
||||
; VMX (no VSX):
|
||||
; %a is passed in register 2
|
||||
; No swaps are necessary on LE
|
||||
define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind {
|
||||
%tmp = add <1 x i128> %a, <i128 1>
|
||||
ret <1 x i128> %tmp
|
||||
|
||||
; CHECK-LE-LABEL: @v1i128_increment_by_one
|
||||
; CHECK-LE: xxswapd [[PARAM1:[0-9]+]], 34
|
||||
; CHECK-LE: stxvd2x [[PARAM1]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-LE: lxvd2x [[RESULT:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-LE: xxswapd 34, [[RESULT]]
|
||||
; CHECK-LE: blr
|
||||
|
||||
; CHECK-BE-LABEL: @v1i128_increment_by_one
|
||||
; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 34
|
||||
; CHECK-BE: stxvd2x 34, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-BE: lxvd2x 34, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
|
||||
; CHECK-BE: blr
|
||||
|
||||
; CHECK-NOVSX-LABEL: @v1i128_increment_by_one
|
||||
; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX-NOT: stxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX: lvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX-NOT: lxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX: blr
|
||||
}
|
||||
|
||||
; VSX:
|
||||
; %a is passed in register 34
|
||||
; %b is passed in register 35
|
||||
; On LE, ensure the contents of 34 and 35 are swapped before being used
|
||||
; Similarly, on LE ensure the results are swapped before being returned in
|
||||
; register 34
|
||||
; VMX (no VSX):
|
||||
; %a is passewd in register 2
|
||||
; %b is passed in register 3
|
||||
; On LE, do not need to swap contents of 2 and 3 because the lvx/stvx
|
||||
; instructions no not swap elements
|
||||
define <1 x i128> @v1i128_increment_by_val(<1 x i128> %a, <1 x i128> %b) nounwind {
|
||||
%tmp = add <1 x i128> %a, %b
|
||||
ret <1 x i128> %tmp
|
||||
|
||||
; CHECK-LE-LABEL: @v1i128_increment_by_val
|
||||
; CHECK-LE-DAG: xxswapd [[PARAM1:[0-9]+]], 34
|
||||
; CHECK-LE-DAG: xxswapd [[PARAM2:[0-9]+]], 35
|
||||
; CHECK-LE-DAG: stxvd2x [[PARAM1]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-LE-DAG: stxvd2x [[PARAM2]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-LE: lxvd2x [[RESULT:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-LE: xxswapd 34, [[RESULT]]
|
||||
; CHECK-LE: blr
|
||||
|
||||
; CHECK-BE-LABEL: @v1i128_increment_by_val
|
||||
; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 34
|
||||
; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 35
|
||||
; CHECK-BE-DAG: stxvd2x 34, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-BE-DAG: stxvd2x 35, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-BE: lxvd2x [[RESULT:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-BE-NOT: xxswapd 34, [[RESULT]]
|
||||
; CHECK-BE: blr
|
||||
|
||||
; CHECK-NOVSX-LABEL: @v1i128_increment_by_val
|
||||
; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX-DAG: stvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX-DAG: stvx 3, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX: lvx [[RESULT:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX-NOT: xxswapd 34, [[RESULT]]
|
||||
; CHECK-NOVSX: blr
|
||||
}
|
||||
|
||||
; Little Endian (VSX and VMX):
|
||||
; Lower 64-bits of %a are passed in register 3
|
||||
; Upper 64-bits of %a are passed in register 4
|
||||
; Increment lower 64-bits using addic (immediate value of 1)
|
||||
; Increment upper 64-bits using add zero extended
|
||||
; Results are placed in registers 3 and 4
|
||||
; Big Endian (VSX and VMX)
|
||||
; Lower 64-bits of %a are passed in register 4
|
||||
; Upper 64-bits of %a are passed in register 3
|
||||
; Increment lower 64-bits using addic (immediate value of 1)
|
||||
; Increment upper 64-bits using add zero extended
|
||||
; Results are placed in registers 3 and 4
|
||||
define i128 @i128_increment_by_one(i128 %a) nounwind {
|
||||
%tmp = add i128 %a, 1
|
||||
ret i128 %tmp
|
||||
; CHECK-LE-LABEL: @i128_increment_by_one
|
||||
; CHECK-LE: addic 3, 3, 1
|
||||
; CHECK-LE-NEXT: addze 4, 4
|
||||
; CHECK-LE: blr
|
||||
|
||||
; CHECK-BE-LABEL: @i128_increment_by_one
|
||||
; CHECK-BE: addic 4, 4, 1
|
||||
; CHECK-BE-NEXT: addze 3, 3
|
||||
; CHECK-BE: blr
|
||||
|
||||
; CHECK-LE-NOVSX-LABEL: @i128_increment_by_one
|
||||
; CHECK-LE-NOVSX: addic 3, 3, 1
|
||||
; CHECK-LE-NOVSX-NEXT: addze 4, 4
|
||||
; CHECK-LE-NOVSX: blr
|
||||
|
||||
; CHECK-BE-NOVSX-LABEL: @i128_increment_by_one
|
||||
; CHECK-BE-NOVSX: addic 4, 4, 1
|
||||
; CHECK-BE-NOVSX-NEXT: addze 3, 3
|
||||
; CHECK-BE-NOVSX: blr
|
||||
}
|
||||
|
||||
; Little Endian (VSX and VMX):
|
||||
; Lower 64-bits of %a are passed in register 3
|
||||
; Upper 64-bits of %a are passed in register 4
|
||||
; Lower 64-bits of %b are passed in register 5
|
||||
; Upper 64-bits of %b are passed in register 6
|
||||
; Add the lower 64-bits using addc on registers 3 and 5
|
||||
; Add the upper 64-bits using adde on registers 4 and 6
|
||||
; Registers 3 and 4 should hold the result
|
||||
; Big Endian (VSX and VMX):
|
||||
; Upper 64-bits of %a are passed in register 3
|
||||
; Lower 64-bits of %a are passed in register 4
|
||||
; Upper 64-bits of %b are passed in register 5
|
||||
; Lower 64-bits of %b are passed in register 6
|
||||
; Add the lower 64-bits using addc on registers 4 and 6
|
||||
; Add the upper 64-bits using adde on registers 3 and 5
|
||||
; Registers 3 and 4 should hold the result
|
||||
define i128 @i128_increment_by_val(i128 %a, i128 %b) nounwind {
|
||||
%tmp = add i128 %a, %b
|
||||
ret i128 %tmp
|
||||
; CHECK-LE-LABEL: @i128_increment_by_val
|
||||
; CHECK-LE: addc 3, 3, 5
|
||||
; CHECK-LE-NEXT: adde 4, 4, 6
|
||||
; CHECK-LE: blr
|
||||
|
||||
; CHECK-BE-LABEL: @i128_increment_by_val
|
||||
; CHECK-BE: addc 4, 4, 6
|
||||
; CHECK-BE-NEXT: adde 3, 3, 5
|
||||
; CHECK-BE: blr
|
||||
|
||||
; CHECK-LE-NOVSX-LABEL: @i128_increment_by_val
|
||||
; CHECK-LE-NOVSX: addc 3, 3, 5
|
||||
; CHECK-LE-NOVSX-NEXT: adde 4, 4, 6
|
||||
; CHECK-LE-NOVSX: blr
|
||||
|
||||
; CHECK-BE-NOVSX-LABEL: @i128_increment_by_val
|
||||
; CHECK-BE-NOVSX: addc 4, 4, 6
|
||||
; CHECK-BE-NOVSX-NEXT: adde 3, 3, 5
|
||||
; CHECK-BE-NOVSX: blr
|
||||
}
|
||||
|
||||
|
||||
; Callsites for the routines defined above.
|
||||
; Ensure the parameters are loaded in the same order that is expected by the
|
||||
; callee. See comments for individual functions above for details on registers
|
||||
; used for parameters.
|
||||
define <1 x i128> @call_v1i128_increment_by_one() nounwind {
|
||||
%tmp = load <1 x i128>, <1 x i128>* @x, align 16
|
||||
%ret = call <1 x i128> @v1i128_increment_by_one(<1 x i128> %tmp)
|
||||
ret <1 x i128> %ret
|
||||
|
||||
; CHECK-LE-LABEL: @call_v1i128_increment_by_one
|
||||
; CHECK-LE: lxvd2x [[PARAM:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-LE: xxswapd 34, [[PARAM]]
|
||||
; CHECK-LE: bl v1i128_increment_by_one
|
||||
; CHECK-LE: blr
|
||||
|
||||
; CHECK-BE-LABEL: @call_v1i128_increment_by_one
|
||||
; CHECK-BE: lxvw4x 34, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
|
||||
; CHECK-BE: bl v1i128_increment_by_one
|
||||
; CHECK-BE: blr
|
||||
|
||||
; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_one
|
||||
; CHECK-NOVSX: lvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX: bl v1i128_increment_by_one
|
||||
; CHECK-NOVSX: blr
|
||||
}
|
||||
|
||||
define <1 x i128> @call_v1i128_increment_by_val() nounwind {
|
||||
%tmp = load <1 x i128>, <1 x i128>* @x, align 16
|
||||
%tmp2 = load <1 x i128>, <1 x i128>* @y, align 16
|
||||
%ret = call <1 x i128> @v1i128_increment_by_val(<1 x i128> %tmp, <1 x i128> %tmp2)
|
||||
ret <1 x i128> %ret
|
||||
|
||||
; CHECK-LE-LABEL: @call_v1i128_increment_by_val
|
||||
; CHECK-LE: lxvd2x [[PARAM1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-LE: lxvd2x [[PARAM2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-LE-DAG: xxswapd 34, [[PARAM1]]
|
||||
; CHECK-LE-DAG: xxswapd 35, [[PARAM2]]
|
||||
; CHECK-LE: bl v1i128_increment_by_val
|
||||
; CHECK-LE: blr
|
||||
|
||||
; CHECK-BE-LABEL: @call_v1i128_increment_by_val
|
||||
|
||||
|
||||
; CHECK-BE-DAG: lxvw4x 35, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
|
||||
; CHECK-BE-NOT: xxswapd 35, {{[0-9]+}}
|
||||
; CHECK-BE: bl v1i128_increment_by_val
|
||||
; CHECK-BE: blr
|
||||
|
||||
; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_val
|
||||
; CHECK-NOVSX-DAG: lvx 2, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX-DAG: lvx 3, {{[0-9]+}}, {{[0-9]+}}
|
||||
; CHECK-NOVSX-NOT: xxswapd 34, {{[0-9]+}}
|
||||
; CHECK-NOVSX-NOT: xxswapd 35, {{[0-9]+}}
|
||||
; CHECK-NOVSX: bl v1i128_increment_by_val
|
||||
; CHECK-NOVSX: blr
|
||||
|
||||
}
|
||||
|
||||
define i128 @call_i128_increment_by_one() nounwind {
|
||||
%tmp = load i128, i128* @a, align 16
|
||||
%ret = call i128 @i128_increment_by_one(i128 %tmp)
|
||||
ret i128 %ret
|
||||
; %ret4 = call i128 @i128_increment_by_val(i128 %tmp2, i128 %tmp2)
|
||||
; CHECK-LE-LABEL: @call_i128_increment_by_one
|
||||
; CHECK-LE-DAG: ld 3, 0([[BASEREG:[0-9]+]])
|
||||
; CHECK-LE-DAG: ld 4, 8([[BASEREG]])
|
||||
; CHECK-LE: bl i128_increment_by_one
|
||||
; CHECK-LE: blr
|
||||
|
||||
; CHECK-BE-LABEL: @call_i128_increment_by_one
|
||||
; CHECK-BE-DAG: ld 3, 0([[BASEREG:[0-9]+]])
|
||||
; CHECK-BE-DAG: ld 4, 8([[BASEREG]])
|
||||
; CHECK-BE: bl i128_increment_by_one
|
||||
; CHECK-BE: blr
|
||||
|
||||
; CHECK-NOVSX-LABEL: @call_i128_increment_by_one
|
||||
; CHECK-NOVSX-DAG: ld 3, 0([[BASEREG:[0-9]+]])
|
||||
; CHECK-NOVSX-DAG: ld 4, 8([[BASEREG]])
|
||||
; CHECK-NOVSX: bl i128_increment_by_one
|
||||
; CHECK-NOVSX: blr
|
||||
}
|
||||
|
||||
define i128 @call_i128_increment_by_val() nounwind {
|
||||
%tmp = load i128, i128* @a, align 16
|
||||
%tmp2 = load i128, i128* @b, align 16
|
||||
%ret = call i128 @i128_increment_by_val(i128 %tmp, i128 %tmp2)
|
||||
ret i128 %ret
|
||||
; CHECK-LE-LABEL: @call_i128_increment_by_val
|
||||
; CHECK-LE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
|
||||
; CHECK-LE-DAG: ld 4, 8([[P1BASEREG]])
|
||||
; CHECK-LE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
|
||||
; CHECK-LE-DAG: ld 6, 8([[P2BASEREG]])
|
||||
; CHECK-LE: bl i128_increment_by_val
|
||||
; CHECK-LE: blr
|
||||
|
||||
; CHECK-BE-LABEL: @call_i128_increment_by_val
|
||||
; CHECK-BE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
|
||||
; CHECK-BE-DAG: ld 4, 8([[P1BASEREG]])
|
||||
; CHECK-BE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
|
||||
; CHECK-BE-DAG: ld 6, 8([[P2BASEREG]])
|
||||
; CHECK-BE: bl i128_increment_by_val
|
||||
; CHECK-BE: blr
|
||||
|
||||
; CHECK-NOVSX-LABEL: @call_i128_increment_by_val
|
||||
; CHECK-NOVSX-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
|
||||
; CHECK-NOVSX-DAG: ld 4, 8([[P1BASEREG]])
|
||||
; CHECK-NOVSX-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
|
||||
; CHECK-NOVSX-DAG: ld 6, 8([[P2BASEREG]])
|
||||
; CHECK-NOVSX: bl i128_increment_by_val
|
||||
; CHECK-NOVSX: blr
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user