diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 5a3b3658b3a..9018489e6d5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1644,6 +1644,38 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, return; } + bool hasOVF = + TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? + ISD::UADDO : ISD::USUBO, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (hasOVF) { + SDVTList VTList = DAG.getVTList(NVT, NVT); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); + int RevOpc; + if (N->getOpcode() == ISD::ADD) { + RevOpc = ISD::SUB; + Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); + Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); + } else { + RevOpc = ISD::ADD; + Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); + Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); + } + SDValue OVF = Lo.getValue(1); + + switch (BoolType) { + case TargetLoweringBase::UndefinedBooleanContent: + OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF); + // Fallthrough + case TargetLoweringBase::ZeroOrOneBooleanContent: + Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF); + break; + case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF); + } + return; + } + if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps); Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index abbcee523a8..a993cf739a7 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -2771,6 +2771,12 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode( KnownZero, KnownOne, DAG, Depth); break; + case AMDGPUISD::CARRY: + case AMDGPUISD::BORROW: { + KnownZero = APInt::getHighBitsSet(32, 31); + break; + } + case AMDGPUISD::BFE_I32: case AMDGPUISD::BFE_U32: { ConstantSDNode *CWidth = dyn_cast(Op.getOperand(2)); @@ -2813,6 +2819,10 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode( return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1; } + case AMDGPUISD::CARRY: + case AMDGPUISD::BORROW: + return 31; + default: return 1; } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 6bc6ca5bbc5..d231bcac2ab 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -250,6 +250,8 @@ enum { LDEXP, FP_CLASS, DOT4, + CARRY, + BORROW, BFE_U32, // Extract range of bits with zero extension to 32-bits. BFE_I32, // Extract range of bits with sign extension to 32-bits. BFI, // (src0 & src1) | (~src0 & src2) diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 901eb5110f2..d72cb1d7f8c 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -136,6 +136,13 @@ def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp, [/*SDNPCommutative, SDNPAssociative*/] >; +// out = (src0 + src1 > 0xFFFFFFFF) ? 1 : 0 +def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>; + +// out = (src1 > src0) ? 1 : 0 +def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>; + + def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0", SDTIntToFPOp, []>; def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1", diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 485321cd404..57a084e6b3e 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -185,6 +185,14 @@ public: return (getGeneration() >= EVERGREEN); } + bool hasCARRY() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasBORROW() const { + return (getGeneration() >= EVERGREEN); + } + bool IsIRStructurizerEnabled() const { return EnableIRStructurizer; } diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td index 55601469009..7adcd46fe19 100644 --- a/lib/Target/R600/EvergreenInstructions.td +++ b/lib/Target/R600/EvergreenInstructions.td @@ -335,6 +335,9 @@ defm CUBE_eg : CUBE_Common<0xC0>; def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>; +def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>; +def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>; + def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>; def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index e2604b2ed22..0125828e6ed 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -91,6 +91,14 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM, setOperationAction(ISD::SELECT, MVT::v2i32, Expand); setOperationAction(ISD::SELECT, MVT::v4i32, Expand); + // ADD, SUB overflow. + // TODO: turn these into Legal? + if (Subtarget->hasCARRY()) + setOperationAction(ISD::UADDO, MVT::i32, Custom); + + if (Subtarget->hasBORROW()) + setOperationAction(ISD::USUBO, MVT::i32, Custom); + // Expand sign extension of vectors if (!Subtarget->hasBFE()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); @@ -163,8 +171,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM, setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); - setOperationAction(ISD::SUB, MVT::i64, Expand); - // These should be replaced by UDVIREM, but it does not happen automatically // during Type Legalization setOperationAction(ISD::UDIV, MVT::i64, Custom); @@ -585,6 +591,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG); case ISD::SRA_PARTS: case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG); + case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY); + case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW); case ISD::FCOS: case ISD::FSIN: return LowerTrig(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -1078,6 +1086,24 @@ SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); } +SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, + unsigned mainop, unsigned ovf) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + + SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi); + // Extend sign. + OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF, + DAG.getValueType(MVT::i1)); + + SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi); + + return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF); +} + SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode( diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index 1f570404254..c06d3c4fd30 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -64,6 +64,8 @@ private: SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRXParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUADDSUBO(SDValue Op, SelectionDAG &DAG, + unsigned mainop, unsigned ovf) const; SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth, SelectionDAG &DAG) const; diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll index 70271616796..655e75dbc1a 100644 --- a/test/CodeGen/R600/add.ll +++ b/test/CodeGen/R600/add.ll @@ -62,6 +62,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { ; EG: ADD_INT ; EG: ADD_INT ; EG: ADD_INT + ; SI: s_add_i32 ; SI: s_add_i32 ; SI: s_add_i32 @@ -94,6 +95,7 @@ entry: ; EG: ADD_INT ; EG: ADD_INT ; EG: ADD_INT + ; SI: s_add_i32 ; SI: s_add_i32 ; SI: s_add_i32 @@ -120,6 +122,14 @@ entry: ; FUNC-LABEL: {{^}}add64: ; SI: s_add_u32 ; SI: s_addc_u32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]] +; EG-DAG: ADD_INT {{[* ]*}}[[LO]] +; EG-DAG: ADDC_UINT +; EG-DAG: ADD_INT +; EG-DAG: ADD_INT {{[* ]*}}[[HI]] +; EG-NOT: SUB define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { entry: %0 = add i64 %a, %b @@ -134,6 +144,14 @@ entry: ; FUNC-LABEL: {{^}}add64_sgpr_vgpr: ; SI-NOT: v_addc_u32_e32 s + +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]] +; EG-DAG: ADD_INT {{[* ]*}}[[LO]] +; EG-DAG: ADDC_UINT +; EG-DAG: ADD_INT +; EG-DAG: ADD_INT {{[* ]*}}[[HI]] +; EG-NOT: SUB define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) { entry: %0 = load i64, i64 addrspace(1)* %in @@ -146,6 +164,14 @@ entry: ; FUNC-LABEL: {{^}}add64_in_branch: ; SI: s_add_u32 ; SI: s_addc_u32 + +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]] +; EG-DAG: ADD_INT {{[* ]*}}[[LO]] +; EG-DAG: ADDC_UINT +; EG-DAG: ADD_INT +; EG-DAG: ADD_INT {{[* ]*}}[[HI]] +; EG-NOT: SUB define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { entry: %0 = icmp eq i64 %a, 0 diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll index 03303f595de..b7fba0efa5b 100644 --- a/test/CodeGen/R600/sub.ll +++ b/test/CodeGen/R600/sub.ll @@ -58,11 +58,13 @@ define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1) ; SI: s_sub_u32 ; SI: s_subb_u32 -; EG-DAG: SETGE_UINT -; EG-DAG: CNDE_INT -; EG-DAG: SUB_INT -; EG-DAG: SUB_INT +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]] +; EG-DAG: SUB_INT {{[* ]*}}[[LO]] +; EG-DAG: SUBB_UINT ; EG-DAG: SUB_INT +; EG-DAG: SUB_INT {{[* ]*}}[[HI]] +; EG-NOT: SUB define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind { %result = sub i64 %a, %b store i64 %result, i64 addrspace(1)* %out, align 8 @@ -73,11 +75,13 @@ define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind ; SI: v_sub_i32_e32 ; SI: v_subb_u32_e32 -; EG-DAG: SETGE_UINT -; EG-DAG: CNDE_INT -; EG-DAG: SUB_INT -; EG-DAG: SUB_INT +; EG: MEM_RAT_CACHELESS STORE_RAW [[LO:T[0-9]+\.[XYZW]]] +; EG: MEM_RAT_CACHELESS STORE_RAW [[HI:T[0-9]+\.[XYZW]]] +; EG-DAG: SUB_INT {{[* ]*}}[[LO]] +; EG-DAG: SUBB_UINT ; EG-DAG: SUB_INT +; EG-DAG: SUB_INT {{[* ]*}}[[HI]] +; EG-NOT: SUB define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() readnone %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll index 9f383654b70..11438f267ad 100644 --- a/test/CodeGen/R600/uaddo.ll +++ b/test/CodeGen/R600/uaddo.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone @@ -9,6 +9,9 @@ declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone ; SI: add ; SI: addc ; SI: addc + +; EG: ADDC_UINT +; EG: ADDC_UINT define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %uadd, 0 @@ -21,6 +24,9 @@ define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { ; FUNC-LABEL: {{^}}s_uaddo_i32: ; SI: s_add_i32 + +; EG: ADDC_UINT +; EG: ADD_INT define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind { %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind %val = extractvalue { i32, i1 } %uadd, 0 @@ -32,6 +38,9 @@ define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 ; FUNC-LABEL: {{^}}v_uaddo_i32: ; SI: v_add_i32 + +; EG: ADDC_UINT +; EG: ADD_INT define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { %a = load i32, i32 addrspace(1)* %aptr, align 4 %b = load i32, i32 addrspace(1)* %bptr, align 4 @@ -46,6 +55,9 @@ define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 ; FUNC-LABEL: {{^}}s_uaddo_i64: ; SI: s_add_u32 ; SI: s_addc_u32 + +; EG: ADDC_UINT +; EG: ADD_INT define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %uadd, 0 @@ -58,6 +70,9 @@ define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 ; FUNC-LABEL: {{^}}v_uaddo_i64: ; SI: v_add_i32 ; SI: v_addc_u32 + +; EG: ADDC_UINT +; EG: ADD_INT define void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %a = load i64, i64 addrspace(1)* %aptr, align 4 %b = load i64, i64 addrspace(1)* %bptr, align 4 diff --git a/test/CodeGen/R600/usubo.ll b/test/CodeGen/R600/usubo.ll index a753ca4e979..3c9b1622a07 100644 --- a/test/CodeGen/R600/usubo.ll +++ b/test/CodeGen/R600/usubo.ll @@ -1,11 +1,14 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone ; FUNC-LABEL: {{^}}usubo_i64_zext: + +; EG: SUBB_UINT +; EG: ADDC_UINT define void @usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %usub, 0 @@ -18,6 +21,9 @@ define void @usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { ; FUNC-LABEL: {{^}}s_usubo_i32: ; SI: s_sub_i32 + +; EG-DAG: SUBB_UINT +; EG-DAG: SUB_INT define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind { %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind %val = extractvalue { i32, i1 } %usub, 0 @@ -29,6 +35,9 @@ define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 ; FUNC-LABEL: {{^}}v_usubo_i32: ; SI: v_subrev_i32_e32 + +; EG-DAG: SUBB_UINT +; EG-DAG: SUB_INT define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { %a = load i32, i32 addrspace(1)* %aptr, align 4 %b = load i32, i32 addrspace(1)* %bptr, align 4 @@ -43,6 +52,11 @@ define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 ; FUNC-LABEL: {{^}}s_usubo_i64: ; SI: s_sub_u32 ; SI: s_subb_u32 + +; EG-DAG: SUBB_UINT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG: SUB_INT define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind %val = extractvalue { i64, i1 } %usub, 0 @@ -55,6 +69,11 @@ define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 ; FUNC-LABEL: {{^}}v_usubo_i64: ; SI: v_sub_i32 ; SI: v_subb_u32 + +; EG-DAG: SUBB_UINT +; EG-DAG: SUB_INT +; EG-DAG: SUB_INT +; EG: SUB_INT define void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %a = load i64, i64 addrspace(1)* %aptr, align 4 %b = load i64, i64 addrspace(1)* %bptr, align 4