[FastISel][AArch64] Fix address simplification.

When a shift with extension or an add with shift and extension cannot be folded
into the memory operation, then the address calculation has to be materialized
separately. While doing so the code forgot to consider a possible sign-/zero-
extension. This fix folds now also the sign-/zero-extension into the add or
shift instruction which is used to materialize the address.

This fixes rdar://problem/18141718.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216511 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Juergen Ributzka 2014-08-27 00:58:30 +00:00
parent 836f4bd090
commit fc03e72b4f
2 changed files with 130 additions and 8 deletions

View File

@ -156,10 +156,19 @@ private:
unsigned emitAddsSubs_ri(bool UseAdds, MVT RetVT, unsigned LHSReg,
bool LHSIsKill, uint64_t Imm,
bool WantResult = true);
unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
AArch64_AM::ShiftExtendType ShiftType,
uint64_t ShiftImm, bool WantResult = true);
unsigned emitAddsSubs_rs(bool UseAdds, MVT RetVT, unsigned LHSReg,
bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
AArch64_AM::ShiftExtendType ShiftType,
uint64_t ShiftImm, bool WantResult = true);
unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
AArch64_AM::ShiftExtendType ExtType,
uint64_t ShiftImm, bool WantResult = true);
unsigned emitAddsSubs_rx(bool UseAdds, MVT RetVT, unsigned LHSReg,
bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
AArch64_AM::ShiftExtendType ExtType,
@ -715,20 +724,38 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT) {
if (RegisterOffsetNeedsLowering) {
unsigned ResultReg = 0;
if (Addr.getReg())
ResultReg = FastEmitInst_rri(AArch64::ADDXrs, &AArch64::GPR64RegClass,
Addr.getReg(), /*TODO:IsKill=*/false,
Addr.getOffsetReg(), /*TODO:IsKill=*/false,
Addr.getShift());
else
ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
/*Op0IsKill=*/false, Addr.getShift());
if (Addr.getReg()) {
if (Addr.getExtendType() == AArch64_AM::SXTW ||
Addr.getExtendType() == AArch64_AM::UXTW )
ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
/*TODO:IsKill=*/false, Addr.getOffsetReg(),
/*TODO:IsKill=*/false, Addr.getExtendType(),
Addr.getShift());
else
ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
/*TODO:IsKill=*/false, Addr.getOffsetReg(),
/*TODO:IsKill=*/false, AArch64_AM::LSL,
Addr.getShift());
} else {
if (Addr.getExtendType() == AArch64_AM::UXTW)
ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
/*Op0IsKill=*/false, Addr.getShift(),
/*IsZExt=*/true);
else if (Addr.getExtendType() == AArch64_AM::SXTW)
ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
/*Op0IsKill=*/false, Addr.getShift(),
/*IsZExt=*/false);
else
ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
/*Op0IsKill=*/false, Addr.getShift());
}
if (!ResultReg)
return false;
Addr.setReg(ResultReg);
Addr.setOffsetReg(0);
Addr.setShift(0);
Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
}
// Since the offset is too large for the load/store instruction get the
@ -978,6 +1005,40 @@ unsigned AArch64FastISel::emitAddsSubs_ri(bool UseAdds, MVT RetVT,
return ResultReg;
}
unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT,
unsigned LHSReg, bool LHSIsKill,
unsigned RHSReg, bool RHSIsKill,
AArch64_AM::ShiftExtendType ShiftType,
uint64_t ShiftImm, bool WantResult) {
assert(LHSReg && RHSReg && "Invalid register number.");
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
static const unsigned OpcTable[2][2] = {
{ AArch64::ADDWrs, AArch64::ADDXrs },
{ AArch64::SUBWrs, AArch64::SUBXrs }
};
unsigned Opc = OpcTable[!UseAdd][(RetVT == MVT::i64)];
unsigned ResultReg;
if (WantResult) {
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
ResultReg = createResultReg(RC);
} else
ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
const MCInstrDesc &II = TII.get(Opc);
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(LHSReg, getKillRegState(LHSIsKill))
.addReg(RHSReg, getKillRegState(RHSIsKill))
.addImm(getShifterImm(ShiftType, ShiftImm));
return ResultReg;
}
unsigned AArch64FastISel::emitAddsSubs_rs(bool UseAdds, MVT RetVT,
unsigned LHSReg, bool LHSIsKill,
unsigned RHSReg, bool RHSIsKill,
@ -1012,6 +1073,40 @@ unsigned AArch64FastISel::emitAddsSubs_rs(bool UseAdds, MVT RetVT,
return ResultReg;
}
unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT,
unsigned LHSReg, bool LHSIsKill,
unsigned RHSReg, bool RHSIsKill,
AArch64_AM::ShiftExtendType ExtType,
uint64_t ShiftImm, bool WantResult) {
assert(LHSReg && RHSReg && "Invalid register number.");
if (RetVT != MVT::i32 && RetVT != MVT::i64)
return 0;
static const unsigned OpcTable[2][2] = {
{ AArch64::ADDWrx, AArch64::ADDXrx },
{ AArch64::SUBWrx, AArch64::SUBXrx }
};
unsigned Opc = OpcTable[!UseAdd][(RetVT == MVT::i64)];
unsigned ResultReg;
if (WantResult) {
const TargetRegisterClass *RC =
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
ResultReg = createResultReg(RC);
} else
ResultReg = (RetVT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
const MCInstrDesc &II = TII.get(Opc);
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(LHSReg, getKillRegState(LHSIsKill))
.addReg(RHSReg, getKillRegState(RHSIsKill))
.addImm(getArithExtendImm(ExtType, ShiftImm));
return ResultReg;
}
unsigned AArch64FastISel::emitAddsSubs_rx(bool UseAdds, MVT RetVT,
unsigned LHSReg, bool LHSIsKill,
unsigned RHSReg, bool RHSIsKill,

View File

@ -423,3 +423,30 @@ define i32 @load_breg_sext_shift_offreg_2(i32 %a, i64 %b) {
ret i32 %5
}
; Load Scaled Register Offset + Immediate Offset + Sign/Zero extension
define i64 @load_sext_shift_offreg_imm1(i32 %a) {
; CHECK-LABEL: load_sext_shift_offreg_imm1
; CHECK: sbfiz [[REG:x[0-9]+]], x0, #3, #32
; CHECK-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]], #8{{\]}}
%1 = sext i32 %a to i64
%2 = shl i64 %1, 3
%3 = add i64 %2, 8
%4 = inttoptr i64 %3 to i64*
%5 = load i64* %4
ret i64 %5
}
; Load Base Register + Scaled Register Offset + Immediate Offset + Sign/Zero extension
define i64 @load_breg_sext_shift_offreg_imm1(i32 %a, i64 %b) {
; CHECK-LABEL: load_breg_sext_shift_offreg_imm1
; CHECK: add [[REG:x[0-9]+]], x1, w0, sxtw #3
; CHECK-NEXT: ldr {{x[0-9]+}}, {{\[}}[[REG]], #8{{\]}}
%1 = sext i32 %a to i64
%2 = shl i64 %1, 3
%3 = add i64 %b, %2
%4 = add i64 %3, 8
%5 = inttoptr i64 %4 to i64*
%6 = load i64* %5
ret i64 %6
}