From 1410a2c9069ef174934e245b86e776dc9720824e Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 3 Jun 2014 13:54:53 +0000 Subject: [PATCH] AArch64: mark small types (i1, i8, i16) as promoted This means the output of LowerFormalArguments returns a lowered SDValue with the correct type (expected in SelectionDAGBuilder). Without this, an assertion under a DEBUG macro triggers when those types are passed on the stack. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210102 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AArch64/AArch64CallingConvention.td | 14 ++++------ lib/Target/AArch64/AArch64FastISel.cpp | 4 +-- lib/Target/AArch64/AArch64ISelLowering.cpp | 16 +++++------ test/CodeGen/AArch64/arm64-abi.ll | 27 ++++++++++--------- 4 files changed, 27 insertions(+), 34 deletions(-) diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index ded2e17c544..8e8bd3d0bcd 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -18,9 +18,6 @@ class CCIfAlign : class CCIfBigEndian : CCIf<"State.getTarget().getDataLayout()->isBigEndian()", A>; -class CCIfUnallocated : - CCIf<"!State.isAllocated(AArch64::" # Reg # ")", A>; - //===----------------------------------------------------------------------===// // ARM AAPCS64 Calling Convention //===----------------------------------------------------------------------===// @@ -45,7 +42,7 @@ def CC_AArch64_AAPCS : CallingConv<[ // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType>>, + CCIfType<[i1, i8, i16], CCPromoteToType>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, // i128 is split to two i64s, we can't fit half to register X7. @@ -120,7 +117,7 @@ def CC_AArch64_DarwinPCS : CallingConv<[ // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType>>, + CCIfType<[i1, i8, i16], CCPromoteToType>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, // i128 is split to two i64s, we can't fit half to register X7. @@ -143,8 +140,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, // If more than will fit in registers, pass them on the stack instead. - CCIfType<[i1, i8], CCAssignToStack<1, 1>>, - CCIfType<[i16], CCAssignToStack<2, 2>>, + CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, + CCIf<"ValVT == MVT::i16", CCAssignToStack<2, 2>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8], CCAssignToStack<8, 8>>, @@ -172,12 +169,11 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ // 32bit quantity as undef. def CC_AArch64_WebKit_JS : CallingConv<[ // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). - CCIfType<[i1, i8, i16], CCIfUnallocated<"X0", CCPromoteToType>>, + CCIfType<[i1, i8, i16], CCPromoteToType>, CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>, CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>, // Pass the remaining arguments on the stack instead. - CCIfType<[i1, i8, i16], CCAssignToStack<4, 4>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[i64, f64], CCAssignToStack<8, 8>> ]>; diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index f97cfb943d7..c84889e1f9d 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -1218,7 +1218,6 @@ bool AArch64FastISel::ProcessCallArgs( Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false); if (Arg == 0) return false; - ArgVT = DestVT; break; } case CCValAssign::AExt: @@ -1229,7 +1228,6 @@ bool AArch64FastISel::ProcessCallArgs( Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true); if (Arg == 0) return false; - ArgVT = DestVT; break; } default: @@ -1248,7 +1246,7 @@ bool AArch64FastISel::ProcessCallArgs( assert(VA.isMemLoc() && "Assuming store on stack."); // Need to store on the stack. - unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; + unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; unsigned BEAlign = 0; if (ArgSize < 8 && !Subtarget->isLittleEndian()) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 4bc22c338e5..4ce0362f4ff 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1781,21 +1781,21 @@ SDValue AArch64TargetLowering::LowerFormalArguments( switch (VA.getLocInfo()) { default: break; + case CCValAssign::BCvt: + MemVT = VA.getLocVT(); + break; case CCValAssign::SExt: ExtType = ISD::SEXTLOAD; - MemVT = VA.getLocVT(); break; case CCValAssign::ZExt: ExtType = ISD::ZEXTLOAD; - MemVT = VA.getLocVT(); break; case CCValAssign::AExt: ExtType = ISD::EXTLOAD; - MemVT = VA.getLocVT(); break; } - ArgValue = DAG.getExtLoad(ExtType, DL, VA.getValVT(), Chain, FIN, + ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN, MachinePointerInfo::getFixedStack(FI), MemVT, false, false, false, 0); @@ -2346,11 +2346,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already // promoted to a legal register type i32, we should truncate Arg back to // i1/i8/i16. - if (Arg.getValueType().isSimple() && - Arg.getValueType().getSimpleVT() == MVT::i32 && - (VA.getLocVT() == MVT::i1 || VA.getLocVT() == MVT::i8 || - VA.getLocVT() == MVT::i16)) - Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getLocVT(), Arg); + if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 || + VA.getValVT() == MVT::i16) + Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg); SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, false, false, 0); diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll index e2de434c7b0..a955029b372 100644 --- a/test/CodeGen/AArch64/arm64-abi.ll +++ b/test/CodeGen/AArch64/arm64-abi.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s +; RUN: llc < %s -debug -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s ; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s +; REQUIRES: asserts target triple = "arm64-apple-darwin" ; rdar://9932559 @@ -8,15 +9,15 @@ entry: ; CHECK-LABEL: i8i16callee: ; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5. ; They are i8, i16, i8 and i8. -; CHECK: ldrsb {{w[0-9]+}}, [sp, #5] -; CHECK: ldrsh {{w[0-9]+}}, [sp, #2] -; CHECK: ldrsb {{w[0-9]+}}, [sp] -; CHECK: ldrsb {{w[0-9]+}}, [sp, #4] +; CHECK-DAG: ldrsb {{w[0-9]+}}, [sp, #5] +; CHECK-DAG: ldrsb {{w[0-9]+}}, [sp, #4] +; CHECK-DAG: ldrsh {{w[0-9]+}}, [sp, #2] +; CHECK-DAG: ldrsb {{w[0-9]+}}, [sp] ; FAST-LABEL: i8i16callee: -; FAST: ldrb {{w[0-9]+}}, [sp, #5] -; FAST: ldrb {{w[0-9]+}}, [sp, #4] -; FAST: ldrh {{w[0-9]+}}, [sp, #2] -; FAST: ldrb {{w[0-9]+}}, [sp] +; FAST-DAG: ldrsb {{w[0-9]+}}, [sp, #5] +; FAST-DAG: ldrsb {{w[0-9]+}}, [sp, #4] +; FAST-DAG: ldrsh {{w[0-9]+}}, [sp, #2] +; FAST-DAG: ldrsb {{w[0-9]+}}, [sp] %conv = sext i8 %a4 to i64 %conv3 = sext i16 %a5 to i64 %conv8 = sext i8 %b1 to i64 @@ -44,10 +45,10 @@ entry: ; CHECK: i8i16caller ; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5. ; They are i8, i16, i8 and i8. -; CHECK: strb {{w[0-9]+}}, [sp, #5] -; CHECK: strb {{w[0-9]+}}, [sp, #4] -; CHECK: strh {{w[0-9]+}}, [sp, #2] -; CHECK: strb {{w[0-9]+}}, [sp] +; CHECK-DAG: strb {{w[0-9]+}}, [sp, #5] +; CHECK-DAG: strb {{w[0-9]+}}, [sp, #4] +; CHECK-DAG: strh {{w[0-9]+}}, [sp, #2] +; CHECK-DAG: strb {{w[0-9]+}}, [sp] ; CHECK: bl ; FAST: i8i16caller ; FAST: strb {{w[0-9]+}}, [sp]