From ece8b73eb28426b7cec82b1a91e83155a8343ad0 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Fri, 13 Jan 2012 22:55:42 +0000 Subject: [PATCH] Use RegisterTuples to generate pseudo-registers. The QQ and QQQQ registers are not 'real', they are pseudo-registers used to model some vld and vst instructions. This makes the call clobber lists longer, but I intend to get rid of those soon. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148151 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 21 ++++++++---- lib/Target/ARM/ARMInstrThumb.td | 14 +++++--- lib/Target/ARM/ARMInstrThumb2.td | 5 +-- lib/Target/ARM/ARMRegisterInfo.td | 56 +++++++++++++------------------ 4 files changed, 51 insertions(+), 45 deletions(-) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 7e1a8ddf251..dabd187069f 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1900,7 +1900,9 @@ let isCall = 1, // FIXME: Do we really need a non-predicated version? If so, it should // at least be a pseudo instruction expanding to the predicated version // at MC lowering time. - Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], + Defs = [R0, R1, R2, R3, R12, LR, + Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, + CPSR, FPSCR], Uses = [SP] in { def BL : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops), IIC_Br, "bl\t$func", @@ -1956,7 +1958,9 @@ let isCall = 1, // On IOS R9 is call-clobbered. // R7 is marked as a use to prevent frame-pointer assignments from being // moved above / below calls. - Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], + Defs = [R0, R1, R2, R3, R9, R12, LR, + Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, + CPSR, FPSCR], Uses = [R7, SP] in { def BLr9 : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops), 4, IIC_Br, @@ -2061,7 +2065,8 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS versions. - let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], + let Defs = [R0, R1, R2, R3, R9, R12, + Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC], Uses = [SP] in { def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), IIC_Br, []>, Requires<[IsIOS]>; @@ -2082,7 +2087,8 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { } // Non-IOS versions (the difference is R9). - let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC], + let Defs = [R0, R1, R2, R3, R12, + Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC], Uses = [SP] in { def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops), IIC_Br, []>, Requires<[IsNotIOS]>; @@ -4711,8 +4717,8 @@ let isCall = 1, // no encoding information is necessary. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, - QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], hasSideEffects = 1, isBarrier = 1, - usesCustomInserter = 1 in { + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ], + hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), NoItinerary, [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, @@ -4743,7 +4749,8 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), // that need the instruction size). let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, - QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], isBarrier = 1 in + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ], + isBarrier = 1 in def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>; let Defs = diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 8238ff065f2..d55b08dc53e 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -405,7 +405,9 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { // potentially appearing dead. let isCall = 1, // On non-IOS platforms R9 is callee-saved. - Defs = [R0, R1, R2, R3, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], + Defs = [R0, R1, R2, R3, R12, LR, + Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, + CPSR, FPSCR], Uses = [SP] in { // Also used for Thumb2 def tBL : TIx2<0b11110, 0b11, 1, @@ -457,7 +459,9 @@ let isCall = 1, // On IOS R9 is call-clobbered. // R7 is marked as a use to prevent frame-pointer assignments from being // moved above / below calls. - Defs = [R0, R1, R2, R3, R9, R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR], + Defs = [R0, R1, R2, R3, R9, R12, LR, + Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, + CPSR, FPSCR], Uses = [R7, SP] in { // Also used for Thumb2 def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops), @@ -524,7 +528,8 @@ let isBranch = 1, isTerminator = 1 in // Tail calls let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS versions. - let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], + let Defs = [R0, R1, R2, R3, R9, R12, + Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC], Uses = [SP] in { // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls // on IOS), so it's in ARMInstrThumb2.td. @@ -534,7 +539,8 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { Requires<[IsThumb, IsIOS]>; } // Non-IOS versions (the difference is R9). - let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC], + let Defs = [R0, R1, R2, R3, R12, + Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, PC], Uses = [SP] in { def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, pred:$p, variable_ops), diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 10422ee111d..8cc0cdbb251 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3096,7 +3096,7 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>, // $val is a scratch register for our use. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, CPSR, - QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], + Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val), @@ -3216,7 +3216,8 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, // it goes here. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // IOS version. - let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC], + let Defs = [R0, R1, R2, R3, R9, R12, PC, + Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15], Uses = [SP] in def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, pred:$p, variable_ops), diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 2035b658280..f341d516499 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -150,36 +150,6 @@ def Q14 : ARMReg<14, "q14", [D28, D29]>; def Q15 : ARMReg<15, "q15", [D30, D31]>; } -// Pseudo 256-bit registers to represent pairs of Q registers. These should -// never be present in the emitted code. -// These are used for NEON load / store instructions, e.g., vld4, vst3. -// NOTE: It's possible to define more QQ registers since technically the -// starting D register number doesn't have to be multiple of 4, e.g., -// D1, D2, D3, D4 would be a legal quad, but that would make the subregister -// stuff very messy. -let SubRegIndices = [qsub_0, qsub_1], - CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in { -def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>; -def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>; -def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>; -def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>; -def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>; -def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>; -def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>; -def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>; -} - -// Pseudo 512-bit registers to represent four consecutive Q registers. -let SubRegIndices = [qqsub_0, qqsub_1], - CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), - (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1), - (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in { -def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>; -def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>; -def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>; -def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; -} - // Current Program Status Register. def CPSR : ARMReg<0, "cpsr">; def APSR : ARMReg<1, "apsr">; @@ -316,9 +286,22 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], (DPR_8 dsub_0, dsub_1)]; } +// Pseudo 256-bit registers to represent pairs of Q registers. These should +// never be present in the emitted code. +// These are used for NEON load / store instructions, e.g., vld4, vst3. +// NOTE: It's possible to define more QQ registers since technically the +// starting D register number doesn't have to be multiple of 4, e.g., +// D1, D2, D3, D4 would be a legal quad, but that would make the subregister +// stuff very messy. +def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], + [(decimate QPR, 2), + (decimate (shl QPR, 1), 2)]> { + let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)]; +} + // Pseudo 256-bit vector register class to model pairs of Q registers // (4 consecutive D registers). -def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> { +def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> { let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3), (QPR qsub_0, qsub_1)]; // Allocate non-VFP2 aliases first. @@ -326,9 +309,18 @@ def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> { let AltOrderSelect = [{ return 1; }]; } +// Pseudo 512-bit registers to represent four consecutive Q registers. +def Tuples2QQ : RegisterTuples<[qqsub_0, qqsub_1], + [(decimate QQPR, 2), + (decimate (shl QQPR, 1), 2)]> { + let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), + (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1), + (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)]; +} + // Pseudo 512-bit vector register class to model 4 consecutive Q registers // (8 consecutive D registers). -def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> { +def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> { let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3, dsub_4, dsub_5, dsub_6, dsub_7), (QPR qsub_0, qsub_1, qsub_2, qsub_3)];