From ca07e256f62f772d14c42f41af46f2aeacc54983 Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Tue, 7 Oct 2014 03:39:59 +0000 Subject: [PATCH] [FastISel][AArch64] Fix "Fold sign-/zero-extends into the load instruction." This commit fixes an issue with sign-/zero-extending loads that was discovered by Richard Barton. We use now the correct load instructions for sign-extending loads to 64bit. Also updated and added more unit tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219185 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64FastISel.cpp | 154 ++++--- test/CodeGen/AArch64/fast-isel-int-ext.ll | 465 ++++++++++++++++++---- 2 files changed, 478 insertions(+), 141 deletions(-) diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index bf3d18343d1..504049bbafd 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -178,8 +178,8 @@ private: bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); - bool emitLoad(MVT VT, unsigned &ResultReg, Address Addr, bool WantZExt = true, - MachineMemOperand *MMO = nullptr); + bool emitLoad(MVT VT, MVT ResultVT, unsigned &ResultReg, Address Addr, + bool WantZExt = true, MachineMemOperand *MMO = nullptr); bool emitStore(MVT VT, unsigned SrcReg, Address Addr, MachineMemOperand *MMO = nullptr); unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); @@ -260,6 +260,8 @@ public: static bool isIntExtFree(const Instruction *I) { assert((isa(I) || isa(I)) && "Unexpected integer extend instruction."); + assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && + "Unexpected value type."); bool IsZExt = isa(I); if (const auto *LI = dyn_cast(I->getOperand(0))) @@ -1589,8 +1591,9 @@ unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); } -bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr, - bool WantZExt, MachineMemOperand *MMO) { +bool AArch64FastISel::emitLoad(MVT VT, MVT RetVT, unsigned &ResultReg, + Address Addr, bool WantZExt, + MachineMemOperand *MMO) { // Simplify this down to something we can handle. if (!simplifyAddress(Addr, VT)) return false; @@ -1607,24 +1610,40 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr, ScaleFactor = 1; } - static const unsigned GPOpcTable[2][4][4] = { + static const unsigned GPOpcTable[2][8][4] = { // Sign-extend. - { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURSWi, + { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, AArch64::LDURXi }, - { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRSWui, + { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, + AArch64::LDURXi }, + { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, AArch64::LDRXui }, - { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRSWroX, + { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, + AArch64::LDRXui }, + { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, AArch64::LDRXroX }, - { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRSWroW, + { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, + AArch64::LDRXroX }, + { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, AArch64::LDRXroW }, + { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, + AArch64::LDRXroW } }, // Zero-extend. { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, AArch64::LDURXi }, + { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, + AArch64::LDURXi }, + { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, + AArch64::LDRXui }, { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, AArch64::LDRXui }, { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, AArch64::LDRXroX }, + { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, + AArch64::LDRXroX }, + { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, + AArch64::LDRXroW }, { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, AArch64::LDRXroW } } @@ -1646,24 +1665,28 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr, Addr.getExtendType() == AArch64_AM::SXTW) Idx++; + bool IsRet64Bit = RetVT == MVT::i64; switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type."); case MVT::i1: // Intentional fall-through. case MVT::i8: - Opc = GPOpcTable[WantZExt][Idx][0]; - RC = &AArch64::GPR32RegClass; + Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; + RC = (IsRet64Bit && !WantZExt) ? + &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; break; case MVT::i16: - Opc = GPOpcTable[WantZExt][Idx][1]; - RC = &AArch64::GPR32RegClass; + Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; + RC = (IsRet64Bit && !WantZExt) ? + &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; break; case MVT::i32: - Opc = GPOpcTable[WantZExt][Idx][2]; - RC = WantZExt ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; + Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; + RC = (IsRet64Bit && !WantZExt) ? + &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; break; case MVT::i64: - Opc = GPOpcTable[WantZExt][Idx][3]; + Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; RC = &AArch64::GPR64RegClass; break; case MVT::f32: @@ -1682,15 +1705,22 @@ bool AArch64FastISel::emitLoad(MVT VT, unsigned &ResultReg, Address Addr, TII.get(Opc), ResultReg); addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); - // For 32bit loads we do sign-extending loads to 64bit and then extract the - // subreg. In the end this is just a NOOP. - if (VT == MVT::i32 && !WantZExt) - ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, /*IsKill=*/true, - AArch64::sub_32); + // For zero-extending loads to 64bit we emit a 32bit load and then convert + // the w-reg to an x-reg. In the end this is just an noop and will be removed. + if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { + unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::SUBREG_TO_REG), Reg64) + .addImm(0) + .addReg(ResultReg, getKillRegState(true)) + .addImm(AArch64::sub_32); + ResultReg = Reg64; + } // Loading an i1 requires special handling. if (VT == MVT::i1) { - unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); + unsigned ANDReg = emitAnd_ri(IsRet64Bit ? MVT::i64 : MVT::i32, ResultReg, + /*IsKill=*/true, 1); assert(ANDReg && "Unexpected AND instruction emission failure."); ResultReg = ANDReg; } @@ -1767,11 +1797,21 @@ bool AArch64FastISel::selectLoad(const Instruction *I) { return false; bool WantZExt = true; - if (I->hasOneUse() && isa(I->use_begin()->getUser())) - WantZExt = false; + MVT RetVT = VT; + if (I->hasOneUse()) { + if (const auto *ZE = dyn_cast(I->use_begin()->getUser())) { + if (!isTypeSupported(ZE->getType(), RetVT, /*IsVectorAllowed=*/false)) + RetVT = VT; + } else if (const auto *SE = dyn_cast(I->use_begin()->getUser())) { + if (!isTypeSupported(SE->getType(), RetVT, /*IsVectorAllowed=*/false)) + RetVT = VT; + WantZExt = false; + } + } unsigned ResultReg; - if (!emitLoad(VT, ResultReg, Addr, WantZExt, createMachineMemOperandFor(I))) + if (!emitLoad(VT, RetVT, ResultReg, Addr, WantZExt, + createMachineMemOperandFor(I))) return false; updateValueMap(I, ResultReg); @@ -2897,7 +2937,7 @@ bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, bool RV; unsigned ResultReg; - RV = emitLoad(VT, ResultReg, Src); + RV = emitLoad(VT, VT, ResultReg, Src); if (!RV) return false; @@ -3917,51 +3957,37 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) { if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) return false; - if (isIntExtFree(I)) { - unsigned SrcReg = getRegForValue(I->getOperand(0)); - if (!SrcReg) - return false; - bool SrcIsKill = hasTrivialKill(I->getOperand(0)); - - const TargetRegisterClass *RC = (RetVT == MVT::i64) ? - &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; - unsigned ResultReg = createResultReg(RC); - if (RetVT == MVT::i64 && SrcVT != MVT::i64) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(AArch64::SUBREG_TO_REG), ResultReg) - .addImm(0) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addImm(AArch64::sub_32); - } else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg) - .addReg(SrcReg, getKillRegState(SrcIsKill)); - } - updateValueMap(I, ResultReg); - return true; - } - unsigned SrcReg = getRegForValue(I->getOperand(0)); if (!SrcReg) return false; - bool SrcRegIsKill = hasTrivialKill(I->getOperand(0)); + bool SrcIsKill = hasTrivialKill(I->getOperand(0)); - unsigned ResultReg = 0; - if (isIntExtFree(I)) { - if (RetVT == MVT::i64) { - ResultReg = createResultReg(&AArch64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(AArch64::SUBREG_TO_REG), ResultReg) - .addImm(0) - .addReg(SrcReg, getKillRegState(SrcRegIsKill)) - .addImm(AArch64::sub_32); - } else - ResultReg = SrcReg; + // The load instruction selection code handles the sign-/zero-extension. + if (const auto *LI = dyn_cast(I->getOperand(0))) { + if (LI->hasOneUse()) { + updateValueMap(I, SrcReg); + return true; + } } - if (!ResultReg) - ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, isa(I)); + bool IsZExt = isa(I); + if (const auto *Arg = dyn_cast(I->getOperand(0))) { + if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { + if (RetVT == MVT::i64 && SrcVT != MVT::i64) { + unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::SUBREG_TO_REG), ResultReg) + .addImm(0) + .addReg(SrcReg, getKillRegState(SrcIsKill)) + .addImm(AArch64::sub_32); + SrcReg = ResultReg; + } + updateValueMap(I, SrcReg); + return true; + } + } + unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); if (!ResultReg) return false; diff --git a/test/CodeGen/AArch64/fast-isel-int-ext.ll b/test/CodeGen/AArch64/fast-isel-int-ext.ll index 115b96d7806..31372ee35bd 100644 --- a/test/CodeGen/AArch64/fast-isel-int-ext.ll +++ b/test/CodeGen/AArch64/fast-isel-int-ext.ll @@ -6,9 +6,9 @@ ; ; SHIFT ; -define i64 @load_addr_shift_zext1(i32 zeroext %a, i64 %b) { +define i64 @load_addr_shift_zext1(i32 %a, i64 %b) { ; CHECK-LABEL: load_addr_shift_zext1 -; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3] +; CHECK: ldr {{x[0-9]+}}, [x1, w0, uxtw #3] %1 = zext i32 %a to i64 %2 = shl i64 %1, 3 %3 = add i64 %b, %2 @@ -17,9 +17,9 @@ define i64 @load_addr_shift_zext1(i32 zeroext %a, i64 %b) { ret i64 %5 } -define i64 @load_addr_shift_zext2(i32 signext %a, i64 %b) { +define i64 @load_addr_shift_zext2(i32 zeroext %a, i64 %b) { ; CHECK-LABEL: load_addr_shift_zext2 -; CHECK: ldr {{x[0-9]+}}, [x1, w0, uxtw #3{{\]}} +; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3] %1 = zext i32 %a to i64 %2 = shl i64 %1, 3 %3 = add i64 %b, %2 @@ -28,9 +28,20 @@ define i64 @load_addr_shift_zext2(i32 signext %a, i64 %b) { ret i64 %5 } -define i64 @load_addr_shift_sext1(i32 signext %a, i64 %b) { +define i64 @load_addr_shift_zext3(i32 signext %a, i64 %b) { +; CHECK-LABEL: load_addr_shift_zext3 +; CHECK: ldr {{x[0-9]+}}, [x1, w0, uxtw #3] + %1 = zext i32 %a to i64 + %2 = shl i64 %1, 3 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i64* + %5 = load i64* %4 + ret i64 %5 +} + +define i64 @load_addr_shift_sext1(i32 %a, i64 %b) { ; CHECK-LABEL: load_addr_shift_sext1 -; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3] +; CHECK: ldr {{x[0-9]+}}, [x1, w0, sxtw #3] %1 = sext i32 %a to i64 %2 = shl i64 %1, 3 %3 = add i64 %b, %2 @@ -50,22 +61,22 @@ define i64 @load_addr_shift_sext2(i32 zeroext %a, i64 %b) { ret i64 %5 } -; -; MUL -; -define i64 @load_addr_mul_zext1(i32 zeroext %a, i64 %b) { -; CHECK-LABEL: load_addr_mul_zext1 +define i64 @load_addr_shift_sext3(i32 signext %a, i64 %b) { +; CHECK-LABEL: load_addr_shift_sext3 ; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3] - %1 = zext i32 %a to i64 - %2 = mul i64 %1, 8 + %1 = sext i32 %a to i64 + %2 = shl i64 %1, 3 %3 = add i64 %b, %2 %4 = inttoptr i64 %3 to i64* %5 = load i64* %4 ret i64 %5 } -define i64 @load_addr_mul_zext2(i32 signext %a, i64 %b) { -; CHECK-LABEL: load_addr_mul_zext2 +; +; MUL +; +define i64 @load_addr_mul_zext1(i32 %a, i64 %b) { +; CHECK-LABEL: load_addr_mul_zext1 ; CHECK: ldr {{x[0-9]+}}, [x1, w0, uxtw #3] %1 = zext i32 %a to i64 %2 = mul i64 %1, 8 @@ -75,9 +86,31 @@ define i64 @load_addr_mul_zext2(i32 signext %a, i64 %b) { ret i64 %5 } -define i64 @load_addr_mul_sext1(i32 signext %a, i64 %b) { -; CHECK-LABEL: load_addr_mul_sext1 +define i64 @load_addr_mul_zext2(i32 zeroext %a, i64 %b) { +; CHECK-LABEL: load_addr_mul_zext2 ; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3] + %1 = zext i32 %a to i64 + %2 = mul i64 %1, 8 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i64* + %5 = load i64* %4 + ret i64 %5 +} + +define i64 @load_addr_mul_zext3(i32 signext %a, i64 %b) { +; CHECK-LABEL: load_addr_mul_zext3 +; CHECK: ldr {{x[0-9]+}}, [x1, w0, uxtw #3] + %1 = zext i32 %a to i64 + %2 = mul i64 %1, 8 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i64* + %5 = load i64* %4 + ret i64 %5 +} + +define i64 @load_addr_mul_sext1(i32 %a, i64 %b) { +; CHECK-LABEL: load_addr_mul_sext1 +; CHECK: ldr {{x[0-9]+}}, [x1, w0, sxtw #3] %1 = sext i32 %a to i64 %2 = mul i64 %1, 8 %3 = add i64 %b, %2 @@ -97,94 +130,372 @@ define i64 @load_addr_mul_sext2(i32 zeroext %a, i64 %b) { ret i64 %5 } +define i64 @load_addr_mul_sext3(i32 signext %a, i64 %b) { +; CHECK-LABEL: load_addr_mul_sext3 +; CHECK: ldr {{x[0-9]+}}, [x1, x0, lsl #3] + %1 = sext i32 %a to i64 + %2 = mul i64 %1, 8 + %3 = add i64 %b, %2 + %4 = inttoptr i64 %3 to i64* + %5 = load i64* %4 + ret i64 %5 +} + + +; ; Test folding of the sign-/zero-extend into the load instruction. -define i32 @load_zext_i8_to_i32(i8* %a) { -; CHECK-LABEL: load_zext_i8_to_i32 -; CHECK: ldrb w0, [x0] +; + +; Unscaled +define i32 @load_unscaled_zext_i8_to_i32(i64 %a) { +; CHECK-LABEL: load_unscaled_zext_i8_to_i32 +; CHECK: ldurb w0, [x0, #-8] ; CHECK-NOT: uxtb - %1 = load i8* %a - %2 = zext i8 %1 to i32 - ret i32 %2 + %1 = add i64 %a, -8 + %2 = inttoptr i64 %1 to i8* + %3 = load i8* %2 + %4 = zext i8 %3 to i32 + ret i32 %4 } -define i32 @load_zext_i16_to_i32(i16* %a) { -; CHECK-LABEL: load_zext_i16_to_i32 -; CHECK: ldrh w0, [x0] +define i32 @load_unscaled_zext_i16_to_i32(i64 %a) { +; CHECK-LABEL: load_unscaled_zext_i16_to_i32 +; CHECK: ldurh w0, [x0, #-8] ; CHECK-NOT: uxth - %1 = load i16* %a - %2 = zext i16 %1 to i32 - ret i32 %2 + %1 = add i64 %a, -8 + %2 = inttoptr i64 %1 to i16* + %3 = load i16* %2 + %4 = zext i16 %3 to i32 + ret i32 %4 } -define i64 @load_zext_i8_to_i64(i8* %a) { -; CHECK-LABEL: load_zext_i8_to_i64 -; CHECK: ldrb w0, [x0] +define i64 @load_unscaled_zext_i8_to_i64(i64 %a) { +; CHECK-LABEL: load_unscaled_zext_i8_to_i64 +; CHECK: ldurb w0, [x0, #-8] ; CHECK-NOT: uxtb - %1 = load i8* %a - %2 = zext i8 %1 to i64 - ret i64 %2 + %1 = add i64 %a, -8 + %2 = inttoptr i64 %1 to i8* + %3 = load i8* %2 + %4 = zext i8 %3 to i64 + ret i64 %4 } -define i64 @load_zext_i16_to_i64(i16* %a) { -; CHECK-LABEL: load_zext_i16_to_i64 -; CHECK: ldrh w0, [x0] +define i64 @load_unscaled_zext_i16_to_i64(i64 %a) { +; CHECK-LABEL: load_unscaled_zext_i16_to_i64 +; CHECK: ldurh w0, [x0, #-8] ; CHECK-NOT: uxth - %1 = load i16* %a - %2 = zext i16 %1 to i64 - ret i64 %2 + %1 = add i64 %a, -8 + %2 = inttoptr i64 %1 to i16* + %3 = load i16* %2 + %4 = zext i16 %3 to i64 + ret i64 %4 } -define i64 @load_zext_i32_to_i64(i32* %a) { -; CHECK-LABEL: load_zext_i32_to_i64 -; CHECK: ldr w0, [x0] +define i64 @load_unscaled_zext_i32_to_i64(i64 %a) { +; CHECK-LABEL: load_unscaled_zext_i32_to_i64 +; CHECK: ldur w0, [x0, #-8] ; CHECK-NOT: uxtw - %1 = load i32* %a - %2 = zext i32 %1 to i64 - ret i64 %2 + %1 = add i64 %a, -8 + %2 = inttoptr i64 %1 to i32* + %3 = load i32* %2 + %4 = zext i32 %3 to i64 + ret i64 %4 } -define i32 @load_sext_i8_to_i32(i8* %a) { -; CHECK-LABEL: load_sext_i8_to_i32 -; CHECK: ldrsb w0, [x0] +define i32 @load_unscaled_sext_i8_to_i32(i64 %a) { +; CHECK-LABEL: load_unscaled_sext_i8_to_i32 +; CHECK: ldursb w0, [x0, #-8] ; CHECK-NOT: sxtb - %1 = load i8* %a - %2 = sext i8 %1 to i32 - ret i32 %2 + %1 = add i64 %a, -8 + %2 = inttoptr i64 %1 to i8* + %3 = load i8* %2 + %4 = sext i8 %3 to i32 + ret i32 %4 } -define i32 @load_sext_i16_to_i32(i16* %a) { -; CHECK-LABEL: load_sext_i16_to_i32 -; CHECK: ldrsh w0, [x0] +define i32 @load_unscaled_sext_i16_to_i32(i64 %a) { +; CHECK-LABEL: load_unscaled_sext_i16_to_i32 +; CHECK: ldursh w0, [x0, #-8] ; CHECK-NOT: sxth - %1 = load i16* %a - %2 = sext i16 %1 to i32 - ret i32 %2 + %1 = add i64 %a, -8 + %2 = inttoptr i64 %1 to i16* + %3 = load i16* %2 + %4 = sext i16 %3 to i32 + ret i32 %4 } -define i64 @load_sext_i8_to_i64(i8* %a) { -; CHECK-LABEL: load_sext_i8_to_i64 -; CHECK: ldrsb w0, [x0] +define i64 @load_unscaled_sext_i8_to_i64(i64 %a) { +; CHECK-LABEL: load_unscaled_sext_i8_to_i64 +; CHECK: ldursb x0, [x0, #-8] ; CHECK-NOT: sxtb - %1 = load i8* %a - %2 = sext i8 %1 to i64 - ret i64 %2 + %1 = add i64 %a, -8 + %2 = inttoptr i64 %1 to i8* + %3 = load i8* %2 + %4 = sext i8 %3 to i64 + ret i64 %4 } -define i64 @load_sext_i16_to_i64(i16* %a) { -; CHECK-LABEL: load_sext_i16_to_i64 -; CHECK: ldrsh w0, [x0] +define i64 @load_unscaled_sext_i16_to_i64(i64 %a) { +; CHECK-LABEL: load_unscaled_sext_i16_to_i64 +; CHECK: ldursh x0, [x0, #-8] ; CHECK-NOT: sxth - %1 = load i16* %a - %2 = sext i16 %1 to i64 - ret i64 %2 + %1 = add i64 %a, -8 + %2 = inttoptr i64 %1 to i16* + %3 = load i16* %2 + %4 = sext i16 %3 to i64 + ret i64 %4 } -define i64 @load_sext_i32_to_i64(i32* %a) { -; CHECK-LABEL: load_sext_i32_to_i64 -; CHECK: ldrsw x0, [x0] +define i64 @load_unscaled_sext_i32_to_i64(i64 %a) { +; CHECK-LABEL: load_unscaled_sext_i32_to_i64 +; CHECK: ldursw x0, [x0, #-8] ; CHECK-NOT: sxtw - %1 = load i32* %a - %2 = sext i32 %1 to i64 - ret i64 %2 + %1 = add i64 %a, -8 + %2 = inttoptr i64 %1 to i32* + %3 = load i32* %2 + %4 = sext i32 %3 to i64 + ret i64 %4 +} + +; Register +define i32 @load_register_zext_i8_to_i32(i64 %a, i64 %b) { +; CHECK-LABEL: load_register_zext_i8_to_i32 +; CHECK: ldrb w0, [x0, x1] +; CHECK-NOT: uxtb + %1 = add i64 %a, %b + %2 = inttoptr i64 %1 to i8* + %3 = load i8* %2 + %4 = zext i8 %3 to i32 + ret i32 %4 +} + +define i32 @load_register_zext_i16_to_i32(i64 %a, i64 %b) { +; CHECK-LABEL: load_register_zext_i16_to_i32 +; CHECK: ldrh w0, [x0, x1] +; CHECK-NOT: uxth + %1 = add i64 %a, %b + %2 = inttoptr i64 %1 to i16* + %3 = load i16* %2 + %4 = zext i16 %3 to i32 + ret i32 %4 +} + +define i64 @load_register_zext_i8_to_i64(i64 %a, i64 %b) { +; CHECK-LABEL: load_register_zext_i8_to_i64 +; CHECK: ldrb w0, [x0, x1] +; CHECK-NOT: uxtb + %1 = add i64 %a, %b + %2 = inttoptr i64 %1 to i8* + %3 = load i8* %2 + %4 = zext i8 %3 to i64 + ret i64 %4 +} + +define i64 @load_register_zext_i16_to_i64(i64 %a, i64 %b) { +; CHECK-LABEL: load_register_zext_i16_to_i64 +; CHECK: ldrh w0, [x0, x1] +; CHECK-NOT: uxth + %1 = add i64 %a, %b + %2 = inttoptr i64 %1 to i16* + %3 = load i16* %2 + %4 = zext i16 %3 to i64 + ret i64 %4 +} + +define i64 @load_register_zext_i32_to_i64(i64 %a, i64 %b) { +; CHECK-LABEL: load_register_zext_i32_to_i64 +; CHECK: ldr w0, [x0, x1] +; CHECK-NOT: uxtw + %1 = add i64 %a, %b + %2 = inttoptr i64 %1 to i32* + %3 = load i32* %2 + %4 = zext i32 %3 to i64 + ret i64 %4 +} + +define i32 @load_register_sext_i8_to_i32(i64 %a, i64 %b) { +; CHECK-LABEL: load_register_sext_i8_to_i32 +; CHECK: ldrsb w0, [x0, x1] +; CHECK-NOT: sxtb + %1 = add i64 %a, %b + %2 = inttoptr i64 %1 to i8* + %3 = load i8* %2 + %4 = sext i8 %3 to i32 + ret i32 %4 +} + +define i32 @load_register_sext_i16_to_i32(i64 %a, i64 %b) { +; CHECK-LABEL: load_register_sext_i16_to_i32 +; CHECK: ldrsh w0, [x0, x1] +; CHECK-NOT: sxth + %1 = add i64 %a, %b + %2 = inttoptr i64 %1 to i16* + %3 = load i16* %2 + %4 = sext i16 %3 to i32 + ret i32 %4 +} + +define i64 @load_register_sext_i8_to_i64(i64 %a, i64 %b) { +; CHECK-LABEL: load_register_sext_i8_to_i64 +; CHECK: ldrsb x0, [x0, x1] +; CHECK-NOT: sxtb + %1 = add i64 %a, %b + %2 = inttoptr i64 %1 to i8* + %3 = load i8* %2 + %4 = sext i8 %3 to i64 + ret i64 %4 +} + +define i64 @load_register_sext_i16_to_i64(i64 %a, i64 %b) { +; CHECK-LABEL: load_register_sext_i16_to_i64 +; CHECK: ldrsh x0, [x0, x1] +; CHECK-NOT: sxth + %1 = add i64 %a, %b + %2 = inttoptr i64 %1 to i16* + %3 = load i16* %2 + %4 = sext i16 %3 to i64 + ret i64 %4 +} + +define i64 @load_register_sext_i32_to_i64(i64 %a, i64 %b) { +; CHECK-LABEL: load_register_sext_i32_to_i64 +; CHECK: ldrsw x0, [x0, x1] +; CHECK-NOT: sxtw + %1 = add i64 %a, %b + %2 = inttoptr i64 %1 to i32* + %3 = load i32* %2 + %4 = sext i32 %3 to i64 + ret i64 %4 +} + +; Extend +define i32 @load_extend_zext_i8_to_i32(i64 %a, i32 %b) { +; CHECK-LABEL: load_extend_zext_i8_to_i32 +; CHECK: sxtw [[REG:x[0-9]+]], w1 +; CHECK-NEXT: ldrb w0, [x0, [[REG]]] +; CHECK-NOT: uxtb + %1 = sext i32 %b to i64 + %2 = add i64 %a, %1 + %3 = inttoptr i64 %2 to i8* + %4 = load i8* %3 + %5 = zext i8 %4 to i32 + ret i32 %5 +} + +define i32 @load_extend_zext_i16_to_i32(i64 %a, i32 %b) { +; CHECK-LABEL: load_extend_zext_i16_to_i32 +; CHECK: sxtw [[REG:x[0-9]+]], w1 +; CHECK-NEXT: ldrh w0, [x0, [[REG]]] +; CHECK-NOT: uxth + %1 = sext i32 %b to i64 + %2 = add i64 %a, %1 + %3 = inttoptr i64 %2 to i16* + %4 = load i16* %3 + %5 = zext i16 %4 to i32 + ret i32 %5 +} + +define i64 @load_extend_zext_i8_to_i64(i64 %a, i32 %b) { +; CHECK-LABEL: load_extend_zext_i8_to_i64 +; CHECK: sxtw [[REG:x[0-9]+]], w1 +; CHECK-NEXT: ldrb w0, [x0, [[REG]]] +; CHECK-NOT: uxtb + %1 = sext i32 %b to i64 + %2 = add i64 %a, %1 + %3 = inttoptr i64 %2 to i8* + %4 = load i8* %3 + %5 = zext i8 %4 to i64 + ret i64 %5 +} + +define i64 @load_extend_zext_i16_to_i64(i64 %a, i32 %b) { +; CHECK-LABEL: load_extend_zext_i16_to_i64 +; CHECK: sxtw [[REG:x[0-9]+]], w1 +; CHECK-NEXT: ldrh w0, [x0, [[REG]]] +; CHECK-NOT: uxth + %1 = sext i32 %b to i64 + %2 = add i64 %a, %1 + %3 = inttoptr i64 %2 to i16* + %4 = load i16* %3 + %5 = zext i16 %4 to i64 + ret i64 %5 +} + +define i64 @load_extend_zext_i32_to_i64(i64 %a, i32 %b) { +; CHECK-LABEL: load_extend_zext_i32_to_i64 +; CHECK: sxtw [[REG:x[0-9]+]], w1 +; CHECK-NEXT: ldr w0, [x0, [[REG]]] +; CHECK-NOT: uxtw + %1 = sext i32 %b to i64 + %2 = add i64 %a, %1 + %3 = inttoptr i64 %2 to i32* + %4 = load i32* %3 + %5 = zext i32 %4 to i64 + ret i64 %5 +} + +define i32 @load_extend_sext_i8_to_i32(i64 %a, i32 %b) { +; CHECK-LABEL: load_extend_sext_i8_to_i32 +; CHECK: sxtw [[REG:x[0-9]+]], w1 +; CHECK-NEXT: ldrsb w0, [x0, [[REG]]] +; CHECK-NOT: sxtb + %1 = sext i32 %b to i64 + %2 = add i64 %a, %1 + %3 = inttoptr i64 %2 to i8* + %4 = load i8* %3 + %5 = sext i8 %4 to i32 + ret i32 %5 +} + +define i32 @load_extend_sext_i16_to_i32(i64 %a, i32 %b) { +; CHECK-LABEL: load_extend_sext_i16_to_i32 +; CHECK: sxtw [[REG:x[0-9]+]], w1 +; CHECK-NEXT: ldrsh w0, [x0, [[REG]]] +; CHECK-NOT: sxth + %1 = sext i32 %b to i64 + %2 = add i64 %a, %1 + %3 = inttoptr i64 %2 to i16* + %4 = load i16* %3 + %5 = sext i16 %4 to i32 + ret i32 %5 +} + +define i64 @load_extend_sext_i8_to_i64(i64 %a, i32 %b) { +; CHECK-LABEL: load_extend_sext_i8_to_i64 +; CHECK: sxtw [[REG:x[0-9]+]], w1 +; CHECK-NEXT: ldrsb x0, [x0, [[REG]]] +; CHECK-NOT: sxtb + %1 = sext i32 %b to i64 + %2 = add i64 %a, %1 + %3 = inttoptr i64 %2 to i8* + %4 = load i8* %3 + %5 = sext i8 %4 to i64 + ret i64 %5 +} + +define i64 @load_extend_sext_i16_to_i64(i64 %a, i32 %b) { +; CHECK-LABEL: load_extend_sext_i16_to_i64 +; CHECK: sxtw [[REG:x[0-9]+]], w1 +; CHECK-NEXT: ldrsh x0, [x0, [[REG]]] +; CHECK-NOT: sxth + %1 = sext i32 %b to i64 + %2 = add i64 %a, %1 + %3 = inttoptr i64 %2 to i16* + %4 = load i16* %3 + %5 = sext i16 %4 to i64 + ret i64 %5 +} + +define i64 @load_extend_sext_i32_to_i64(i64 %a, i32 %b) { +; CHECK-LABEL: load_extend_sext_i32_to_i64 +; CHECK: sxtw [[REG:x[0-9]+]], w1 +; CHECK-NEXT: ldrsw x0, [x0, [[REG]]] +; CHECK-NOT: sxtw + %1 = sext i32 %b to i64 + %2 = add i64 %a, %1 + %3 = inttoptr i64 %2 to i32* + %4 = load i32* %3 + %5 = sext i32 %4 to i64 + ret i64 %5 }