diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index a79ad458f6c..763f40c7ff4 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -264,6 +264,7 @@ public: // Used by the TableGen code to check for particular operand types. bool isGR32() const { return isReg(GR32Reg); } bool isGRH32() const { return isReg(GRH32Reg); } + bool isGRX32() const { return false; } bool isGR64() const { return isReg(GR64Reg); } bool isGR128() const { return isReg(GR128Reg); } bool isADDR32() const { return isReg(ADDR32Reg); } @@ -362,6 +363,10 @@ public: return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg); } OperandMatchResultTy + parseGRX32(SmallVectorImpl &Operands) { + llvm_unreachable("GRX32 should only be used for pseudo instructions"); + } + OperandMatchResultTy parseGR64(SmallVectorImpl &Operands) { return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg); } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 9a4a290a550..19b2bcb921e 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -82,6 +82,7 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) { if (!Initialized) { for (unsigned I = 0; I < 16; ++I) { Map[GR32Regs[I]] = I; + Map[GRH32Regs[I]] = I; Map[GR64Regs[I]] = I; Map[GR128Regs[I]] = I; Map[FP32Regs[I]] = I; diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 380fa87eda5..9ad5c8bdc66 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -35,6 +35,18 @@ static MCInst lowerRILow(const MachineInstr *MI, unsigned Opcode) { .addImm(MI->getOperand(2).getImm()); } +// Return an RI instruction like MI with opcode Opcode, but with the +// R2 register turned into a GR64. +static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) { + return MCInstBuilder(Opcode) + .addReg(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg())) + .addImm(MI->getOperand(3).getImm()) + .addImm(MI->getOperand(4).getImm()) + .addImm(MI->getOperand(5).getImm()); +} + void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { SystemZMCInstLower Lower(Mang, MF->getContext(), *this); MCInst LoweredMI; @@ -70,6 +82,16 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(MI->getOperand(2).getImm()); break; + case SystemZ::RISBHH: + case SystemZ::RISBHL: + LoweredMI = lowerRIEfLow(MI, SystemZ::RISBHG); + break; + + case SystemZ::RISBLH: + case SystemZ::RISBLL: + LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG); + break; + #define LOWER_LOW(NAME) \ case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 21653e85e1c..cb0f445cf74 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -51,7 +51,10 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) MVT PtrVT = getPointerTy(); // Set up the register classes. - addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); + if (Subtarget.hasHighWord()) + addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); + else + addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); @@ -338,6 +341,7 @@ SystemZTargetLowering::getConstraintType(const std::string &Constraint) const { case 'a': // Address register case 'd': // Data register (equivalent to 'r') case 'f': // Floating-point register + case 'h': // High-part register case 'r': // General-purpose register return C_RegisterClass; @@ -380,6 +384,7 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, case 'a': // Address register case 'd': // Data register (equivalent to 'r') + case 'h': // High-part register case 'r': // General-purpose register if (CallOperandVal->getType()->isIntegerTy()) weight = CW_Register; @@ -460,6 +465,9 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); + case 'h': // High-part register (an LLVM extension) + return std::make_pair(0U, &SystemZ::GRH32BitRegClass); + case 'f': // Floating-point register if (VT == MVT::f64) return std::make_pair(0U, &SystemZ::FP64BitRegClass); @@ -733,7 +741,7 @@ static bool canUseSiblingCall(CCState ArgCCInfo, if (!VA.isRegLoc()) return false; unsigned Reg = VA.getLocReg(); - if (Reg == SystemZ::R6L || Reg == SystemZ::R6D) + if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) return false; } return true; diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index a104329a317..97a1578f174 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -1349,6 +1349,40 @@ class Pseudo pattern> let isCodeGenOnly = 1; } +// Like UnaryRXY, but expanded after RA depending on the choice of registers. +class UnaryRXYPseudo bytes, + AddressingMode mode = bdxaddr20only> + : Pseudo<(outs cls:$R1), (ins mode:$XBD2), + [(set cls:$R1, (operator mode:$XBD2))]> { + let OpKey = key ## cls; + let OpType = "mem"; + let mayLoad = 1; + let Has20BitOffset = 1; + let HasIndex = 1; + let AccessBytes = bytes; +} + +// Like UnaryRR, but expanded after RA depending on the choice of registers. +class UnaryRRPseudo + : Pseudo<(outs cls1:$R1), (ins cls2:$R2), + [(set cls1:$R1, (operator cls2:$R2))]> { + let OpKey = key ## cls1; + let OpType = "reg"; +} + +// Like StoreRXY, but expanded after RA depending on the choice of registers. +class StoreRXYPseudo bytes, AddressingMode mode = bdxaddr20only> + : Pseudo<(outs), (ins cls:$R1, mode:$XBD2), + [(operator cls:$R1, mode:$XBD2)]> { + let mayStore = 1; + let Has20BitOffset = 1; + let HasIndex = 1; + let AccessBytes = bytes; +} + // Implements "$dst = $cc & (8 >> CC) ? $src1 : $src2", where CC is // the value of the PSW's 2-bit condition code field. class SelectWrapper @@ -1493,3 +1527,10 @@ class BinaryAliasRIL { let Constraints = "$R1 = $R1src"; } + +// An alias of a RotateSelectRIEf, but with different register sizes. +class RotateSelectAliasRIEf + : Alias<6, (outs cls1:$R1), + (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), []> { + let Constraints = "$R1 = $R1src"; +} diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index ba182bf76c8..798fa3349f9 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -28,6 +28,15 @@ static uint64_t allOnes(unsigned int Count) { return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; } +// Reg should be a 32-bit GPR. Return true if it is a high register rather +// than a low register. +static bool isHighReg(unsigned int Reg) { + if (SystemZ::GRH32BitRegClass.contains(Reg)) + return true; + assert(SystemZ::GR32BitRegClass.contains(Reg) && "Invalid GRX32"); + return false; +} + SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP), RI(tm), TM(tm) { @@ -82,6 +91,48 @@ void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const { OffsetMO.setImm(Offset); } +// MI is an RXY-style pseudo instruction. Replace it with LowOpcode +// if the first operand is a low GR32 and HighOpcode if the first operand +// is a high GR32. +void SystemZInstrInfo::expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned HighOpcode) const { + unsigned Reg = MI->getOperand(0).getReg(); + unsigned Opcode = getOpcodeForOffset(isHighReg(Reg) ? HighOpcode : LowOpcode, + MI->getOperand(2).getImm()); + MI->setDesc(get(Opcode)); +} + +// Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR +// DestReg before MBBI in MBB. Use LowLowOpcode when both DestReg and SrcReg +// are low registers, otherwise use RISB[LH]G. Size is the number of bits +// taken from the low end of SrcReg (8 for LLCR, 16 for LLHR and 32 for LR). +// KillSrc is true if this move is the last use of SrcReg. +void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, unsigned DestReg, + unsigned SrcReg, unsigned LowLowOpcode, + unsigned Size, bool KillSrc) const { + unsigned Opcode; + bool DestIsHigh = isHighReg(DestReg); + bool SrcIsHigh = isHighReg(SrcReg); + if (DestIsHigh && SrcIsHigh) + Opcode = SystemZ::RISBHH; + else if (DestIsHigh && !SrcIsHigh) + Opcode = SystemZ::RISBHL; + else if (!DestIsHigh && SrcIsHigh) + Opcode = SystemZ::RISBLH; + else { + BuildMI(MBB, MBBI, DL, get(LowLowOpcode), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + unsigned Rotate = (DestIsHigh != SrcIsHigh ? 32 : 0); + BuildMI(MBB, MBBI, DL, get(Opcode), DestReg) + .addReg(DestReg, RegState::Undef) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(32 - Size).addImm(128 + 31).addImm(Rotate); +} + // If MI is a simple load or store for a frame object, return the register // it loads or stores and set FrameIndex to the index of the frame object. // Return 0 otherwise. @@ -460,11 +511,14 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + if (SystemZ::GRX32BitRegClass.contains(DestReg, SrcReg)) { + emitGRX32Move(MBB, MBBI, DL, DestReg, SrcReg, SystemZ::LR, 32, KillSrc); + return; + } + // Everything else needs only one instruction. unsigned Opcode; - if (SystemZ::GR32BitRegClass.contains(DestReg, SrcReg)) - Opcode = SystemZ::LR; - else if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) + if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg)) Opcode = SystemZ::LGR; else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg)) Opcode = SystemZ::LER; @@ -601,7 +655,7 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (And.RegSize == 64) NewOpcode = SystemZ::RISBG; else if (TM.getSubtargetImpl()->hasHighWord()) - NewOpcode = SystemZ::RISBLG32; + NewOpcode = SystemZ::RISBLL; else // We can't use RISBG for 32-bit operations because it clobbers the // high word of the destination too. @@ -612,7 +666,7 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB); unsigned Start, End; if (isRxSBGMask(Imm, And.RegSize, Start, End)) { - if (NewOpcode == SystemZ::RISBLG32) { + if (NewOpcode == SystemZ::RISBLL) { Start &= 31; End &= 31; } @@ -752,6 +806,14 @@ SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { splitMove(MI, SystemZ::STD); return true; + case SystemZ::LMux: + expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH); + return true; + + case SystemZ::STMux: + expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH); + return true; + case SystemZ::ADJDYNALLOC: splitAdjDynAlloc(MI); return true; @@ -824,6 +886,12 @@ void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC, if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) { LoadOpcode = SystemZ::L; StoreOpcode = SystemZ::ST; + } else if (RC == &SystemZ::GRH32BitRegClass) { + LoadOpcode = SystemZ::LFH; + StoreOpcode = SystemZ::STFH; + } else if (RC == &SystemZ::GRX32BitRegClass) { + LoadOpcode = SystemZ::LMux; + StoreOpcode = SystemZ::STMux; } else if (RC == &SystemZ::GR64BitRegClass || RC == &SystemZ::ADDR64BitRegClass) { LoadOpcode = SystemZ::LG; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 40fd1b622e6..6e4ab1b4eba 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -116,7 +116,12 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const; void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const; - + void expandRXYPseudo(MachineInstr *MI, unsigned LowOpcode, + unsigned HighOpcode) const; + void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc DL, unsigned DestReg, unsigned SrcReg, + unsigned LowLowOpcode, unsigned Size, bool KillSrc) const; + public: explicit SystemZInstrInfo(SystemZTargetMachine &TM); diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index b93e863a957..241cd3336a9 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -254,6 +254,9 @@ def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), // Register moves. let neverHasSideEffects = 1 in { + // Expands to LR, RISBHG or RISBLG, depending on the choice of registers. + def LRMux : UnaryRRPseudo<"l", null_frag, GRX32, GRX32>, + Requires<[FeatureHighWord]>; def LR : UnaryRR <"l", 0x18, null_frag, GR32, GR32>; def LGR : UnaryRRE<"lg", 0xB904, null_frag, GR64, GR64>; } @@ -293,6 +296,9 @@ let neverHasSideEffects = 1, isAsCheapAsAMove = 1, isMoveImm = 1, // Register loads. let canFoldAsLoad = 1, SimpleBDXLoad = 1 in { + // Expands to L, LY or LFH, depending on the choice of register. + def LMux : UnaryRXYPseudo<"l", load, GRX32, 4>, + Requires<[FeatureHighWord]>; defm L : UnaryRXPair<"l", 0x58, 0xE358, load, GR32, 4>; def LFH : UnaryRXY<"lfh", 0xE3CA, load, GRH32, 4>, Requires<[FeatureHighWord]>; @@ -327,6 +333,9 @@ let Uses = [CC] in { // Register stores. let SimpleBDXStore = 1 in { + // Expands to ST, STY or STFH, depending on the choice of register. + def STMux : StoreRXYPseudo, + Requires<[FeatureHighWord]>; defm ST : StoreRXPair<"st", 0x50, 0xE350, store, GR32, 4>; def STFH : StoreRXY<"stfh", 0xE3CB, store, GRH32, 4>, Requires<[FeatureHighWord]>; @@ -929,13 +938,14 @@ let Defs = [CC] in { // Forms of RISBG that only affect one word of the destination register. // They do not set CC. -let isCodeGenOnly = 1 in - def RISBLG32 : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR32>, - Requires<[FeatureHighWord]>; -def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GRH32, GR64>, - Requires<[FeatureHighWord]>; +def RISBLL : RotateSelectAliasRIEf, Requires<[FeatureHighWord]>; +def RISBLH : RotateSelectAliasRIEf, Requires<[FeatureHighWord]>; +def RISBHL : RotateSelectAliasRIEf, Requires<[FeatureHighWord]>; +def RISBHH : RotateSelectAliasRIEf, Requires<[FeatureHighWord]>; def RISBLG : RotateSelectRIEf<"risblg", 0xEC51, GR32, GR64>, Requires<[FeatureHighWord]>; +def RISBHG : RotateSelectRIEf<"risbhg", 0xEC5D, GRH32, GR64>, + Requires<[FeatureHighWord]>; // Rotate second operand left and perform a logical operation with selected // bits of the first operand. The CC result only describes the selected bits, diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 841f0ae0269..b61ae88f733 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -43,12 +43,14 @@ SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // R11D is the frame pointer. Reserve all aliases. Reserved.set(SystemZ::R11D); Reserved.set(SystemZ::R11L); + Reserved.set(SystemZ::R11H); Reserved.set(SystemZ::R10Q); } // R15D is the stack pointer. Reserve all aliases. Reserved.set(SystemZ::R15D); Reserved.set(SystemZ::R15L); + Reserved.set(SystemZ::R15H); Reserved.set(SystemZ::R14Q); return Reserved; } diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index 6d83714bffa..93d7c8375b3 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -91,6 +91,15 @@ defm GRH32 : SystemZRegClass<"GRH32", i32, 32, (add (sequence "R%uH", 0, 5), defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5), (sequence "R%uD", 15, 6))>; +// Combine the low and high GR32s into a single class. This can only be +// used for virtual registers if the high-word facility is available. +defm GRX32 : SystemZRegClass<"GRX32", i32, 32, + (add (sequence "R%uL", 0, 5), + (sequence "R%uH", 0, 5), + R15L, R15H, R14L, R14H, R13L, R13H, + R12L, R12H, R11L, R11H, R10L, R10H, + R9L, R9H, R8L, R8H, R7L, R7H, R6L, R6H)>; + // The architecture doesn't really have any i128 support, so model the // register pairs as untyped instead. defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q, diff --git a/test/CodeGen/SystemZ/asm-18.ll b/test/CodeGen/SystemZ/asm-18.ll new file mode 100644 index 00000000000..b9c96c07adc --- /dev/null +++ b/test/CodeGen/SystemZ/asm-18.ll @@ -0,0 +1,52 @@ +; Test high-word operations, using "h" constraints to force a high +; register and "r" constraints to force a low register. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Test loads and stores involving mixtures of high and low registers. +define void @f1(i32 *%ptr1, i32 *%ptr2) { +; CHECK-LABEL: f1: +; CHECK-DAG: lfh [[REG1:%r[0-5]]], 0(%r2) +; CHECK-DAG: l [[REG2:%r[0-5]]], 0(%r3) +; CHECK-DAG: lfh [[REG3:%r[0-5]]], 4096(%r2) +; CHECK-DAG: ly [[REG4:%r[0-5]]], 524284(%r3) +; CHECK: blah [[REG1]], [[REG2]], [[REG3]], [[REG4]] +; CHECK-DAG: stfh [[REG1]], 0(%r2) +; CHECK-DAG: st [[REG2]], 0(%r3) +; CHECK-DAG: stfh [[REG3]], 4096(%r2) +; CHECK-DAG: sty [[REG4]], 524284(%r3) +; CHECK: br %r14 + %ptr3 = getelementptr i32 *%ptr1, i64 1024 + %ptr4 = getelementptr i32 *%ptr2, i64 131071 + %old1 = load i32 *%ptr1 + %old2 = load i32 *%ptr2 + %old3 = load i32 *%ptr3 + %old4 = load i32 *%ptr4 + %res = call { i32, i32, i32, i32 } asm "blah $0, $1, $2, $3", + "=h,=r,=h,=r,0,1,2,3"(i32 %old1, i32 %old2, i32 %old3, i32 %old4) + %new1 = extractvalue { i32, i32, i32, i32 } %res, 0 + %new2 = extractvalue { i32, i32, i32, i32 } %res, 1 + %new3 = extractvalue { i32, i32, i32, i32 } %res, 2 + %new4 = extractvalue { i32, i32, i32, i32 } %res, 3 + store i32 %new1, i32 *%ptr1 + store i32 %new2, i32 *%ptr2 + store i32 %new3, i32 *%ptr3 + store i32 %new4, i32 *%ptr4 + ret void +} + +; Test moves involving mixtures of high and low registers. +define i32 @f2(i32 %old) { +; CHECK-LABEL: f2: +; CHECK-DAG: risbhg [[REG1:%r[0-5]]], %r2, 0, 159, 32 +; CHECK-DAG: lr %r3, %r2 +; CHECK: stepa [[REG1]], %r2, %r3 +; CHECK: risbhg {{%r[0-5]}}, [[REG1]], 0, 159, 0 +; CHECK: stepb [[REG2:%r[0-5]]] +; CHECK: risblg %r2, [[REG2]], 0, 159, 32 +; CHECK: br %r14 + %tmp = call i32 asm "stepa $1, $2, $3", + "=h,0,{r2},{r3}"(i32 %old, i32 %old, i32 %old) + %new = call i32 asm "stepb $1, $2", "=&h,0,h"(i32 %tmp, i32 %tmp) + ret i32 %new +}