mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-26 23:32:58 +00:00
Make ARM and Thumb2 32-bit immediate materialization into a single 32-bit pseudo
instruction. This makes it re-materializable. Thumb2 will split it back out into two instructions so IT pass will generate the right mask. Also, this expose opportunies to optimize the movw to a 16-bit move. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82982 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
705428ae4a
commit
5adb66a646
@ -864,7 +864,8 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
|
||||
/// getInstrPredicate - If instruction is predicated, returns its predicate
|
||||
/// condition, otherwise returns AL. It also returns the condition code
|
||||
/// register by reference.
|
||||
ARMCC::CondCodes llvm::getInstrPredicate(MachineInstr *MI, unsigned &PredReg) {
|
||||
ARMCC::CondCodes
|
||||
llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
|
||||
int PIdx = MI->findFirstPredOperandIdx();
|
||||
if (PIdx == -1) {
|
||||
PredReg = 0;
|
||||
|
@ -296,7 +296,7 @@ bool isJumpTableBranchOpcode(int Opc) {
|
||||
/// getInstrPredicate - If instruction is predicated, returns its predicate
|
||||
/// condition, otherwise returns AL. It also returns the condition code
|
||||
/// register by reference.
|
||||
ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg);
|
||||
ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
|
||||
|
||||
int getMatchingCondBranchOpcode(int Opc);
|
||||
|
||||
|
@ -596,7 +596,8 @@ void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) {
|
||||
unsigned Opcode = MI.getDesc().Opcode;
|
||||
switch (Opcode) {
|
||||
default:
|
||||
llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");//FIXME:
|
||||
llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
|
||||
// FIXME: Add support for MOVimm32.
|
||||
case TargetInstrInfo::INLINEASM: {
|
||||
// We allow inline assembler nodes with empty bodies - they can
|
||||
// implicitly define registers, which is ok for JIT.
|
||||
|
@ -984,6 +984,11 @@ class T2JTI<dag oops, dag iops, InstrItinClass itin,
|
||||
string asm, list<dag> pattern>
|
||||
: Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
|
||||
|
||||
class T2Ix2<dag oops, dag iops, InstrItinClass itin,
|
||||
string opc, string asm, list<dag> pattern>
|
||||
: Thumb2I<oops, iops, AddrModeNone, Size8Bytes, itin, opc, asm, "", pattern>;
|
||||
|
||||
|
||||
// T2Iidxldst - Thumb2 indexed load / store instructions.
|
||||
class T2Iidxldst<dag oops, dag iops, AddrMode am, IndexMode im,
|
||||
InstrItinClass itin,
|
||||
|
@ -96,7 +96,6 @@ reMaterialize(MachineBasicBlock &MBB,
|
||||
|
||||
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
|
||||
MI->getOperand(0).setReg(DestReg);
|
||||
|
||||
MBB.insert(I, MI);
|
||||
}
|
||||
|
||||
|
@ -100,6 +100,7 @@ def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
|
||||
def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
|
||||
def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
|
||||
def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">;
|
||||
def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
|
||||
def HasV7 : Predicate<"Subtarget->hasV7Ops()">;
|
||||
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
|
||||
def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
|
||||
@ -938,7 +939,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
|
||||
let Inst{25} = 1;
|
||||
}
|
||||
|
||||
let isAsCheapAsAMove = 1, Constraints = "$src = $dst" in
|
||||
let Constraints = "$src = $dst" in
|
||||
def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
|
||||
DPFrm, IIC_iMOVi,
|
||||
"movt", " $dst, $imm",
|
||||
@ -1510,7 +1511,8 @@ let isReMaterializable = 1 in
|
||||
def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src),
|
||||
Pseudo, IIC_iMOVi,
|
||||
"mov", " $dst, $src",
|
||||
[(set GPR:$dst, so_imm2part:$src)]>;
|
||||
[(set GPR:$dst, so_imm2part:$src)]>,
|
||||
Requires<[IsARM, NoV6T2]>;
|
||||
|
||||
def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS),
|
||||
(ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
|
||||
@ -1519,9 +1521,14 @@ def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
|
||||
(EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
|
||||
(so_imm2part_2 imm:$RHS))>;
|
||||
|
||||
def : ARMPat<(i32 imm:$src),
|
||||
(MOVTi16 (MOVi16 (lo16 imm:$src)), (hi16 imm:$src))>,
|
||||
Requires<[IsARM, HasV6T2]>;
|
||||
// 32-bit immediate using movw + movt.
|
||||
// This is a single pseudo instruction to make it re-materializable. Remove
|
||||
// when we can do generalized remat.
|
||||
let isReMaterializable = 1 in
|
||||
def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi,
|
||||
"movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}",
|
||||
[(set GPR:$dst, (i32 imm:$src))]>,
|
||||
Requires<[IsARM, HasV6T2]>;
|
||||
|
||||
// TODO: add,sub,and, 3-instr forms?
|
||||
|
||||
|
@ -649,7 +649,8 @@ let neverHasSideEffects = 1 in
|
||||
def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
|
||||
"mov", ".w $dst, $src", []>;
|
||||
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
|
||||
// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in
|
||||
def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
|
||||
"mov", ".w $dst, $src",
|
||||
[(set GPR:$dst, t2_so_imm:$src)]>;
|
||||
@ -660,10 +661,10 @@ def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
|
||||
[(set GPR:$dst, imm0_65535:$src)]>;
|
||||
|
||||
let Constraints = "$src = $dst" in
|
||||
def t2MOVTi16 : T2sI<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
|
||||
"movt", " $dst, $imm",
|
||||
[(set GPR:$dst,
|
||||
(or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>;
|
||||
def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
|
||||
"movt", " $dst, $imm",
|
||||
[(set GPR:$dst,
|
||||
(or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Extend Instructions.
|
||||
@ -1127,7 +1128,10 @@ def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
|
||||
def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
|
||||
(t2LEApcrelJT tjumptable:$dst, imm:$id)>;
|
||||
|
||||
// Large immediate handling.
|
||||
|
||||
def : T2Pat<(i32 imm:$src),
|
||||
(t2MOVTi16 (t2MOVi16 (lo16 imm:$src)), (hi16 imm:$src))>;
|
||||
// 32-bit immediate using movw + movt.
|
||||
// This is a single pseudo instruction to make it re-materializable. Remove
|
||||
// when we can do generalized remat.
|
||||
let isReMaterializable = 1 in
|
||||
def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
|
||||
"movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}",
|
||||
[(set GPR:$dst, (i32 imm:$src))]>;
|
||||
|
@ -329,7 +329,14 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
|
||||
break;
|
||||
}
|
||||
case MachineOperand::MO_Immediate: {
|
||||
O << '#' << MO.getImm();
|
||||
int64_t Imm = MO.getImm();
|
||||
if (Modifier) {
|
||||
if (strcmp(Modifier, "lo16") == 0)
|
||||
Imm = Imm & 0xffffLL;
|
||||
else if (strcmp(Modifier, "hi16") == 0)
|
||||
Imm = (Imm & 0xffff0000LL) >> 16;
|
||||
}
|
||||
O << '#' << Imm;
|
||||
break;
|
||||
}
|
||||
case MachineOperand::MO_MachineBasicBlock:
|
||||
|
@ -592,3 +592,11 @@ conditional move:
|
||||
it saves an instruction and a register.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
add/sub/and/or + i32 imm can be simplified by folding part of the immediate
|
||||
into the operation.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
It might be profitable to cse MOVi16 if there are lots of 32-bit immediates
|
||||
with the same bottom half.
|
||||
|
@ -40,12 +40,11 @@ namespace {
|
||||
char Thumb2ITBlockPass::ID = 0;
|
||||
}
|
||||
|
||||
static ARMCC::CondCodes getPredicate(const MachineInstr *MI,
|
||||
const Thumb2InstrInfo *TII) {
|
||||
static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){
|
||||
unsigned Opc = MI->getOpcode();
|
||||
if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
|
||||
return ARMCC::AL;
|
||||
return TII->getPredicate(MI);
|
||||
return llvm::getInstrPredicate(MI, PredReg);
|
||||
}
|
||||
|
||||
bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
|
||||
@ -54,14 +53,39 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
|
||||
while (MBBI != E) {
|
||||
MachineInstr *MI = &*MBBI;
|
||||
ARMCC::CondCodes CC = getPredicate(MI, TII);
|
||||
DebugLoc dl = MI->getDebugLoc();
|
||||
unsigned PredReg = 0;
|
||||
ARMCC::CondCodes CC = getPredicate(MI, PredReg);
|
||||
|
||||
// Splitting t2MOVi32imm into a pair of t2MOVi16 + t2MOVTi16 here.
|
||||
// The only reason it was a single instruction was so it could be
|
||||
// re-materialized. We want to split it before this and the thumb2
|
||||
// size reduction pass to make sure the IT mask is correct and expose
|
||||
// width reduction opportunities. It doesn't make sense to do this in a
|
||||
// separate pass so here it is.
|
||||
if (MI->getOpcode() == ARM::t2MOVi32imm) {
|
||||
unsigned DstReg = MI->getOperand(0).getReg();
|
||||
bool DstDead = MI->getOperand(0).isDead(); // Is this possible?
|
||||
unsigned Imm = MI->getOperand(1).getImm();
|
||||
unsigned Lo16 = Imm & 0xffff;
|
||||
unsigned Hi16 = (Imm >> 16) & 0xffff;
|
||||
BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVi16), DstReg)
|
||||
.addImm(Lo16).addImm(CC).addReg(PredReg);
|
||||
BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVTi16))
|
||||
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead))
|
||||
.addReg(DstReg).addImm(Hi16).addImm(CC).addReg(PredReg);
|
||||
--MBBI;
|
||||
--MBBI;
|
||||
MI->eraseFromParent();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (CC == ARMCC::AL) {
|
||||
++MBBI;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Insert an IT instruction.
|
||||
DebugLoc dl = MI->getDebugLoc();
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
|
||||
.addImm(CC);
|
||||
++MBBI;
|
||||
@ -70,7 +94,8 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
|
||||
ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
|
||||
unsigned Mask = 0, Pos = 3;
|
||||
while (MBBI != E && Pos) {
|
||||
ARMCC::CondCodes NCC = getPredicate(&*MBBI, TII);
|
||||
unsigned Dummy = 0;
|
||||
ARMCC::CondCodes NCC = getPredicate(&*MBBI, Dummy);
|
||||
if (NCC == OCC) {
|
||||
Mask |= (1 << Pos);
|
||||
} else if (NCC != CC)
|
||||
|
@ -79,6 +79,7 @@ namespace {
|
||||
{ ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 },
|
||||
{ ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 },
|
||||
{ ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 },
|
||||
{ ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 },
|
||||
// FIXME: Do we need the 16-bit 'S' variant?
|
||||
{ ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 },
|
||||
{ ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 },
|
||||
|
@ -1,10 +1,11 @@
|
||||
; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep movt | grep #1234
|
||||
; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep movt | grep #1234
|
||||
; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep movt | grep #1234
|
||||
; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep movt | grep #1234
|
||||
; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep mov | grep movt
|
||||
; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
|
||||
|
||||
define i32 @t2MOVTi16_ok_1(i32 %a) {
|
||||
; CHECK: t2MOVTi16_ok_1:
|
||||
; CHECK: movs r1, #0
|
||||
; CHECK-NEXT: movt r1, #1234
|
||||
; CHECK: movw r1, #65535
|
||||
; CHECK-NEXT: movt r1, #1234
|
||||
%1 = and i32 %a, 65535
|
||||
%2 = shl i32 1234, 16
|
||||
%3 = or i32 %1, %2
|
||||
@ -13,6 +14,11 @@ define i32 @t2MOVTi16_ok_1(i32 %a) {
|
||||
}
|
||||
|
||||
define i32 @t2MOVTi16_test_1(i32 %a) {
|
||||
; CHECK: t2MOVTi16_test_1:
|
||||
; CHECK: movs r1, #0
|
||||
; CHECK-NEXT: movt r1, #1234
|
||||
; CHECK: movw r1, #65535
|
||||
; CHECK-NEXT: movt r1, #1234
|
||||
%1 = shl i32 255, 8
|
||||
%2 = shl i32 1234, 8
|
||||
%3 = or i32 %1, 255 ; This give us 0xFFFF in %3
|
||||
@ -24,6 +30,11 @@ define i32 @t2MOVTi16_test_1(i32 %a) {
|
||||
}
|
||||
|
||||
define i32 @t2MOVTi16_test_2(i32 %a) {
|
||||
; CHECK: t2MOVTi16_test_2:
|
||||
; CHECK: movs r1, #0
|
||||
; CHECK-NEXT: movt r1, #1234
|
||||
; CHECK: movw r1, #65535
|
||||
; CHECK-NEXT: movt r1, #1234
|
||||
%1 = shl i32 255, 8
|
||||
%2 = shl i32 1234, 8
|
||||
%3 = or i32 %1, 255 ; This give us 0xFFFF in %3
|
||||
@ -36,6 +47,11 @@ define i32 @t2MOVTi16_test_2(i32 %a) {
|
||||
}
|
||||
|
||||
define i32 @t2MOVTi16_test_3(i32 %a) {
|
||||
; CHECK: t2MOVTi16_test_3:
|
||||
; CHECK: movs r1, #0
|
||||
; CHECK-NEXT: movt r1, #1234
|
||||
; CHECK: movw r1, #65535
|
||||
; CHECK-NEXT: movt r1, #1234
|
||||
%1 = shl i32 255, 8
|
||||
%2 = shl i32 1234, 8
|
||||
%3 = or i32 %1, 255 ; This give us 0xFFFF in %3
|
||||
@ -50,6 +66,11 @@ define i32 @t2MOVTi16_test_3(i32 %a) {
|
||||
}
|
||||
|
||||
define i32 @t2MOVTi16_test_nomatch_1(i32 %a) {
|
||||
; CHECK: t2MOVTi16_test_nomatch_1:
|
||||
; CHECK: movw r1, #16384
|
||||
; CHECK-NEXT: movt r1, #154
|
||||
; CHECK: movw r1, #65535
|
||||
; CHECK-NEXT: movt r1, #154
|
||||
%1 = shl i32 255, 8
|
||||
%2 = shl i32 1234, 8
|
||||
%3 = or i32 %1, 255 ; This give us 0xFFFF in %3
|
||||
@ -58,7 +79,6 @@ define i32 @t2MOVTi16_test_nomatch_1(i32 %a) {
|
||||
%6 = shl i32 %4, 2 ; This gives us (1234 << 16) in %6
|
||||
%7 = lshr i32 %6, 3
|
||||
%8 = or i32 %5, %7
|
||||
|
||||
ret i32 %8
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user