[SystemZ] Use upper words of GR64s for codegen

This just adds the basics necessary for allocating the upper words to
virtual registers (move, load and store).  The move support is parameterised
in a way that makes it easy to handle zero extensions, but the associated
zero-extend patterns are added by a later patch.

The easiest way of testing this seemed to be add a new "h" register
constraint for high words.  I don't expect the constraint to be useful
in real inline asms, but it should work, so I didn't try to hide it
behind an option.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191739 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Richard Sandiford 2013-10-01 11:26:28 +00:00
parent 7d0b89bedd
commit 55d7d83b6c
11 changed files with 236 additions and 13 deletions

View File

@ -264,6 +264,7 @@ public:
// Used by the TableGen code to check for particular operand types.
bool isGR32() const { return isReg(GR32Reg); }
bool isGRH32() const { return isReg(GRH32Reg); }
bool isGRX32() const { return false; }
bool isGR64() const { return isReg(GR64Reg); }
bool isGR128() const { return isReg(GR128Reg); }
bool isADDR32() const { return isReg(ADDR32Reg); }
@ -362,6 +363,10 @@ public:
return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg);
}
OperandMatchResultTy
parseGRX32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
llvm_unreachable("GRX32 should only be used for pseudo instructions");
}
OperandMatchResultTy
parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg);
}

View File

@ -82,6 +82,7 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) {
if (!Initialized) {
for (unsigned I = 0; I < 16; ++I) {
Map[GR32Regs[I]] = I;
Map[GRH32Regs[I]] = I;
Map[GR64Regs[I]] = I;
Map[GR128Regs[I]] = I;
Map[FP32Regs[I]] = I;

View File

@ -35,6 +35,18 @@ static MCInst lowerRILow(const MachineInstr *MI, unsigned Opcode) {
.addImm(MI->getOperand(2).getImm());
}
// Return an RI instruction like MI with opcode Opcode, but with the
// R2 register turned into a GR64.
static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) {
return MCInstBuilder(Opcode)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg()))
.addImm(MI->getOperand(3).getImm())
.addImm(MI->getOperand(4).getImm())
.addImm(MI->getOperand(5).getImm());
}
void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SystemZMCInstLower Lower(Mang, MF->getContext(), *this);
MCInst LoweredMI;
@ -70,6 +82,16 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addImm(MI->getOperand(2).getImm());
break;
case SystemZ::RISBHH:
case SystemZ::RISBHL:
LoweredMI = lowerRIEfLow(MI, SystemZ::RISBHG);
break;
case SystemZ::RISBLH:
case SystemZ::RISBLL:
LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG);
break;
#define LOWER_LOW(NAME) \
case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break

View File

@ -51,7 +51,10 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
MVT PtrVT = getPointerTy();
// Set up the register classes.
addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
if (Subtarget.hasHighWord())
addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
else
addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
@ -338,6 +341,7 @@ SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
case 'a': // Address register
case 'd': // Data register (equivalent to 'r')
case 'f': // Floating-point register
case 'h': // High-part register
case 'r': // General-purpose register
return C_RegisterClass;
@ -380,6 +384,7 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
case 'a': // Address register
case 'd': // Data register (equivalent to 'r')
case 'h': // High-part register
case 'r': // General-purpose register
if (CallOperandVal->getType()->isIntegerTy())
weight = CW_Register;
@ -460,6 +465,9 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const {
return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
case 'h': // High-part register (an LLVM extension)
return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
case 'f': // Floating-point register
if (VT == MVT::f64)
return std::make_pair(0U, &SystemZ::FP64BitRegClass);
@ -733,7 +741,7 @@ static bool canUseSiblingCall(CCState ArgCCInfo,
if (!VA.isRegLoc())
return false;
unsigned Reg = VA.getLocReg();
if (Reg == SystemZ::R6L || Reg == SystemZ::R6D)
if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
return false;
}
return true;

View File

@ -1349,6 +1349,40 @@ class Pseudo<dag outs, dag ins, list<dag> pattern>
let isCodeGenOnly = 1;
}
// Like UnaryRXY, but expanded after RA depending on the choice of registers.
class UnaryRXYPseudo<string key, SDPatternOperator operator,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdxaddr20only>
: Pseudo<(outs cls:$R1), (ins mode:$XBD2),
[(set cls:$R1, (operator mode:$XBD2))]> {
let OpKey = key ## cls;
let OpType = "mem";
let mayLoad = 1;
let Has20BitOffset = 1;
let HasIndex = 1;
let AccessBytes = bytes;
}
// Like UnaryRR, but expanded after RA depending on the choice of registers.
class UnaryRRPseudo<string key, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
: Pseudo<(outs cls1:$R1), (ins cls2:$R2),
[(set cls1:$R1, (operator cls2:$R2))]> {
let OpKey = key ## cls1;
let OpType = "reg";
}
// Like StoreRXY, but expanded after RA depending on the choice of registers.
class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
bits<5> bytes, AddressingMode mode = bdxaddr20only>
: Pseudo<(outs), (ins cls:$R1, mode:$XBD2),
[(operator cls:$R1, mode:$XBD2)]> {
let mayStore = 1;
let Has20BitOffset = 1;
let HasIndex = 1;
let AccessBytes = bytes;
}
// Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is
// the value of the PSW's 2-bit condition code field.
class SelectWrapper<RegisterOperand cls>
@ -1493,3 +1527,10 @@ class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls,
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
let Constraints = "$R1 = $R1src";
}
// An alias of a RotateSelectRIEf, but with different register sizes.
class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2>
: Alias<6, (outs cls1:$R1),
(ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), []> {
let Constraints = "$R1 = $R1src";
}

View File

@ -28,6 +28,15 @@ static uint64_t allOnes(unsigned int Count) {
return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
}
// Reg should be a 32-bit GPR. Return true if it is a high register rather
// than a low register.
static bool isHighReg(unsigned int Reg) {
if (SystemZ::GRH32BitRegClass.contains(Reg))
return true;
assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32");
return false;
}
SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
: SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
RI(tm), TM(tm) {
@ -82,6 +91,48 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
OffsetMO.setImm(Offset);
}
// MI is an RXY-style pseudo instruction. Replace it with LowOpcode
// if the first operand is a low GR32 and HighOpcode if the first operand
// is a high GR32.
void SystemZInstrInfo::expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode,
unsigned HighOpcode) const {
unsigned Reg = MI->getOperand(0).getReg();
unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode,
MI->getOperand(2).getImm());
MI->setDesc(get(Opcode));
}
// Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR
// DestReg before MBBI in MBB. Use LowLowOpcode when both DestReg and SrcReg
// are low registers, otherwise use RISB[LH]G. Size is the number of bits
// taken from the low end of SrcReg (8 for LLCR, 16 for LLHR and 32 for LR).
// KillSrc is true if this move is the last use of SrcReg.
void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc DL, unsigned DestReg,
unsigned SrcReg, unsigned LowLowOpcode,
unsigned Size, bool KillSrc) const {
unsigned Opcode;
bool DestIsHigh = isHighReg(DestReg);
bool SrcIsHigh = isHighReg(SrcReg);
if (DestIsHigh && SrcIsHigh)
Opcode = SystemZ::RISBHH;
else if (DestIsHigh && !SrcIsHigh)
Opcode = SystemZ::RISBHL;
else if (!DestIsHigh && SrcIsHigh)
Opcode = SystemZ::RISBLH;
else {
BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
unsigned Rotate = (DestIsHigh != SrcIsHigh ? 32 : 0);
BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
.addReg(DestReg, RegState::Undef)
.addReg(SrcReg, getKillRegState(KillSrc))
.addImm(32 - Size).addImm(128 + 31).addImm(Rotate);
}
// If MI is a simple load or store for a frame object, return the register
// it loads or stores and set FrameIndex to the index of the frame object.
// Return 0 otherwise.
@ -460,11 +511,14 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) {
emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc);
return;
}
// Everything else needs only one instruction.
unsigned Opcode;
if (SystemZ::GR32BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LR;
else if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LGR;
else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LER;
@ -601,7 +655,7 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (And.RegSize == 64)
NewOpcode = SystemZ::RISBG;
else if (TM.getSubtargetImpl()->hasHighWord())
NewOpcode = SystemZ::RISBLG32;
NewOpcode = SystemZ::RISBLL;
else
// We can't use RISBG for 32-bit operations because it clobbers the
// high word of the destination too.
@ -612,7 +666,7 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
unsigned Start, End;
if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
if (NewOpcode == SystemZ::RISBLG32) {
if (NewOpcode == SystemZ::RISBLL) {
Start &= 31;
End &= 31;
}
@ -752,6 +806,14 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
splitMove(MI, SystemZ::STD);
return true;
case SystemZ::LMux:
expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH);
return true;
case SystemZ::STMux:
expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH);
return true;
case SystemZ::ADJDYNALLOC:
splitAdjDynAlloc(MI);
return true;
@ -824,6 +886,12 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) {
LoadOpcode = SystemZ::L;
StoreOpcode = SystemZ::ST;
} else if (RC == &SystemZ::GRH32BitRegClass) {
LoadOpcode = SystemZ::LFH;
StoreOpcode = SystemZ::STFH;
} else if (RC == &SystemZ::GRX32BitRegClass) {
LoadOpcode = SystemZ::LMux;
StoreOpcode = SystemZ::STMux;
} else if (RC == &SystemZ::GR64BitRegClass ||
RC == &SystemZ::ADDR64BitRegClass) {
LoadOpcode = SystemZ::LG;

View File

@ -116,7 +116,12 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const;
void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const;
void expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode,
unsigned HighOpcode) const;
void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
DebugLoc DL, unsigned DestReg, unsigned SrcReg,
unsigned LowLowOpcode, unsigned Size, bool KillSrc) const;
public:
explicit SystemZInstrInfo(SystemZTargetMachine &TM);

View File

@ -254,6 +254,9 @@ def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
// Register moves.
let neverHasSideEffects = 1 in {
// Expands to LR, RISBHG or RISBLG, depending on the choice of registers.
def LRMux : UnaryRRPseudo<"l", null_frag, GRX32, GRX32>,
Requires<[FeatureHighWord]>;
def LR : UnaryRR <"l", 0x18, null_frag, GR32, GR32>;
def LGR : UnaryRRE<"lg", 0xB904, null_frag, GR64, GR64>;
}
@ -293,6 +296,9 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
// Register loads.
let canFoldAsLoad = 1, SimpleBDXLoad = 1 in {
// Expands to L, LY or LFH, depending on the choice of register.
def LMux : UnaryRXYPseudo<"l", load, GRX32, 4>,
Requires<[FeatureHighWord]>;
defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32, 4>;
def LFH : UnaryRXY<"lfh", 0xE3CA, load, GRH32, 4>,
Requires<[FeatureHighWord]>;
@ -327,6 +333,9 @@ let Uses = [CC] in {
// Register stores.
let SimpleBDXStore = 1 in {
// Expands to ST, STY or STFH, depending on the choice of register.
def STMux : StoreRXYPseudo<store, GRX32, 4>,
Requires<[FeatureHighWord]>;
defm ST : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>;
def STFH : StoreRXY<"stfh", 0xE3CB, store, GRH32, 4>,
Requires<[FeatureHighWord]>;
@ -929,13 +938,14 @@ let Defs = [CC] in {
// Forms of RISBG that only affect one word of the destination register.
// They do not set CC.
let isCodeGenOnly = 1 in
def RISBLG32 : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR32>,
Requires<[FeatureHighWord]>;
def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GRH32, GR64>,
Requires<[FeatureHighWord]>;
def RISBLL : RotateSelectAliasRIEf<GR32, GR32>, Requires<[FeatureHighWord]>;
def RISBLH : RotateSelectAliasRIEf<GR32, GRH32>, Requires<[FeatureHighWord]>;
def RISBHL : RotateSelectAliasRIEf<GRH32, GR32>, Requires<[FeatureHighWord]>;
def RISBHH : RotateSelectAliasRIEf<GRH32, GRH32>, Requires<[FeatureHighWord]>;
def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR64>,
Requires<[FeatureHighWord]>;
def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GRH32, GR64>,
Requires<[FeatureHighWord]>;
// Rotate second operand left and perform a logical operation with selected
// bits of the first operand. The CC result only describes the selected bits,

View File

@ -43,12 +43,14 @@ SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// R11D is the frame pointer. Reserve all aliases.
Reserved.set(SystemZ::R11D);
Reserved.set(SystemZ::R11L);
Reserved.set(SystemZ::R11H);
Reserved.set(SystemZ::R10Q);
}
// R15D is the stack pointer. Reserve all aliases.
Reserved.set(SystemZ::R15D);
Reserved.set(SystemZ::R15L);
Reserved.set(SystemZ::R15H);
Reserved.set(SystemZ::R14Q);
return Reserved;
}

View File

@ -91,6 +91,15 @@ defm GRH32 : SystemZRegClass<"GRH32", i32, 32, (add (sequence "R%uH", 0, 5),
defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5),
(sequence "R%uD", 15, 6))>;
// Combine the low and high GR32s into a single class. This can only be
// used for virtual registers if the high-word facility is available.
defm GRX32 : SystemZRegClass<"GRX32", i32, 32,
(add (sequence "R%uL", 0, 5),
(sequence "R%uH", 0, 5),
R15L, R15H, R14L, R14H, R13L, R13H,
R12L, R12H, R11L, R11H, R10L, R10H,
R9L, R9H, R8L, R8H, R7L, R7H, R6L, R6H)>;
// The architecture doesn't really have any i128 support, so model the
// register pairs as untyped instead.
defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q,

View File

@ -0,0 +1,52 @@
; Test high-word operations, using "h" constraints to force a high
; register and "r" constraints to force a low register.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
; Test loads and stores involving mixtures of high and low registers.
define void @f1(i32 *%ptr1, i32 *%ptr2) {
; CHECK-LABEL: f1:
; CHECK-DAG: lfh [[REG1:%r[0-5]]], 0(%r2)
; CHECK-DAG: l [[REG2:%r[0-5]]], 0(%r3)
; CHECK-DAG: lfh [[REG3:%r[0-5]]], 4096(%r2)
; CHECK-DAG: ly [[REG4:%r[0-5]]], 524284(%r3)
; CHECK: blah [[REG1]], [[REG2]], [[REG3]], [[REG4]]
; CHECK-DAG: stfh [[REG1]], 0(%r2)
; CHECK-DAG: st [[REG2]], 0(%r3)
; CHECK-DAG: stfh [[REG3]], 4096(%r2)
; CHECK-DAG: sty [[REG4]], 524284(%r3)
; CHECK: br %r14
%ptr3 = getelementptr i32 *%ptr1, i64 1024
%ptr4 = getelementptr i32 *%ptr2, i64 131071
%old1 = load i32 *%ptr1
%old2 = load i32 *%ptr2
%old3 = load i32 *%ptr3
%old4 = load i32 *%ptr4
%res = call { i32, i32, i32, i32 } asm "blah $0, $1, $2, $3",
"=h,=r,=h,=r,0,1,2,3"(i32 %old1, i32 %old2, i32 %old3, i32 %old4)
%new1 = extractvalue { i32, i32, i32, i32 } %res, 0
%new2 = extractvalue { i32, i32, i32, i32 } %res, 1
%new3 = extractvalue { i32, i32, i32, i32 } %res, 2
%new4 = extractvalue { i32, i32, i32, i32 } %res, 3
store i32 %new1, i32 *%ptr1
store i32 %new2, i32 *%ptr2
store i32 %new3, i32 *%ptr3
store i32 %new4, i32 *%ptr4
ret void
}
; Test moves involving mixtures of high and low registers.
define i32 @f2(i32 %old) {
; CHECK-LABEL: f2:
; CHECK-DAG: risbhg [[REG1:%r[0-5]]], %r2, 0, 159, 32
; CHECK-DAG: lr %r3, %r2
; CHECK: stepa [[REG1]], %r2, %r3
; CHECK: risbhg {{%r[0-5]}}, [[REG1]], 0, 159, 0
; CHECK: stepb [[REG2:%r[0-5]]]
; CHECK: risblg %r2, [[REG2]], 0, 159, 32
; CHECK: br %r14
%tmp = call i32 asm "stepa $1, $2, $3",
"=h,0,{r2},{r3}"(i32 %old, i32 %old, i32 %old)
%new = call i32 asm "stepb $1, $2", "=&h,0,h"(i32 %tmp, i32 %tmp)
ret i32 %new
}