From d8f4348cab0f6ac58d62746aceb822b7d615807e Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 23 Jan 2014 20:23:36 +0000 Subject: [PATCH] Replace vfmaddxx213 instructions with their 231-type equivalents in accumulator loops. Writing back to the accumulator (231-type) allows the coalescer to eliminate an extra copy. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199933 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 101 +++++++++++++++++++++++++++++ lib/Target/X86/X86ISelLowering.h | 3 + lib/Target/X86/X86InstrFMA.td | 6 +- test/CodeGen/X86/fma.ll | 15 +++++ 4 files changed, 122 insertions(+), 3 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ede17c1750a..3e641cdba91 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -15963,6 +15963,81 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, return MBB; } +// Replace 213-type (isel default) FMA3 instructions with 231-type for +// accumulator loops. Writing back to the accumulator allows the coalescer +// to remove extra copies in the loop. +MachineBasicBlock * +X86TargetLowering::emitFMA3Instr(MachineInstr *MI, + MachineBasicBlock *MBB) const { + MachineOperand &AddendOp = MI->getOperand(3); + + // Bail out early if the addend isn't a register - we can't switch these. + if (!AddendOp.isReg()) + return MBB; + + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Check whether the addend is defined by a PHI: + assert(MRI.hasOneDef(AddendOp.getReg()) && "Multiple defs in SSA?"); + MachineInstr &AddendDef = *MRI.def_begin(AddendOp.getReg()); + if (!AddendDef.isPHI()) + return MBB; + + // Look for the following pattern: + // loop: + // %addend = phi [%entry, 0], [%loop, %result] + // ... + // %result = FMA213 %m2, %m1, %addend + + // Replace with: + // loop: + // %addend = phi [%entry, 0], [%loop, %result] + // ... + // %result = FMA231 %addend, %m1, %m2 + + for (unsigned i = 1, e = AddendDef.getNumOperands(); i < e; i += 2) { + assert(AddendDef.getOperand(i).isReg()); + MachineOperand PHISrcOp = AddendDef.getOperand(i); + MachineInstr &PHISrcInst = *MRI.def_begin(PHISrcOp.getReg()); + if (&PHISrcInst == MI) { + // Found a matching instruction. + unsigned NewFMAOpc = 0; + switch (MI->getOpcode()) { + case X86::VFMADDPDr213r: NewFMAOpc = X86::VFMADDPDr231r; break; + case X86::VFMADDPSr213r: NewFMAOpc = X86::VFMADDPSr231r; break; + case X86::VFMADDSDr213r: NewFMAOpc = X86::VFMADDSDr231r; break; + case X86::VFMADDSSr213r: NewFMAOpc = X86::VFMADDSSr231r; break; + case X86::VFMSUBPDr213r: NewFMAOpc = X86::VFMSUBPDr231r; break; + case X86::VFMSUBPSr213r: NewFMAOpc = X86::VFMSUBPSr231r; break; + case X86::VFMSUBSDr213r: NewFMAOpc = X86::VFMSUBSDr231r; break; + case X86::VFMSUBSSr213r: NewFMAOpc = X86::VFMSUBSSr231r; break; + case X86::VFNMADDPDr213r: NewFMAOpc = X86::VFNMADDPDr231r; break; + case X86::VFNMADDPSr213r: NewFMAOpc = X86::VFNMADDPSr231r; break; + case X86::VFNMADDSDr213r: NewFMAOpc = X86::VFNMADDSDr231r; break; + case X86::VFNMADDSSr213r: NewFMAOpc = X86::VFNMADDSSr231r; break; + case X86::VFNMSUBPDr213r: NewFMAOpc = X86::VFNMSUBPDr231r; break; + case X86::VFNMSUBPSr213r: NewFMAOpc = X86::VFNMSUBPSr231r; break; + case X86::VFNMSUBSDr213r: NewFMAOpc = X86::VFNMSUBSDr231r; break; + case X86::VFNMSUBSSr213r: NewFMAOpc = X86::VFNMSUBSSr231r; break; + default: llvm_unreachable("Unrecognized FMA variant."); + } + + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + MachineInstrBuilder MIB = + BuildMI(MF, MI->getDebugLoc(), TII.get(NewFMAOpc)) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(3)) + .addOperand(MI->getOperand(2)) + .addOperand(MI->getOperand(1)); + MBB->insert(MachineBasicBlock::iterator(MI), MIB); + MI->eraseFromParent(); + } + } + + return MBB; +} + MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -16194,6 +16269,32 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); + + case X86::VFMADDPDr213r: + case X86::VFMADDPSr213r: + case X86::VFMADDSDr213r: + case X86::VFMADDSSr213r: + case X86::VFMSUBPDr213r: + case X86::VFMSUBPSr213r: + case X86::VFMSUBSDr213r: + case X86::VFMSUBSSr213r: + case X86::VFNMADDPDr213r: + case X86::VFNMADDPSr213r: + case X86::VFNMADDSDr213r: + case X86::VFNMADDSSr213r: + case X86::VFNMSUBPDr213r: + case X86::VFNMSUBPSr213r: + case X86::VFNMSUBSDr213r: + case X86::VFNMSUBSSr213r: + case X86::VFMADDPDr213rY: + case X86::VFMADDPSr213rY: + case X86::VFMSUBPDr213rY: + case X86::VFMSUBPSr213rY: + case X86::VFNMADDPDr213rY: + case X86::VFNMADDPSr213rY: + case X86::VFNMSUBPDr213rY: + case X86::VFNMSUBPSr213rY: + return emitFMA3Instr(MI, BB); } } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 9b32d121010..d985c98875c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -972,6 +972,9 @@ namespace llvm { MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock *MBB) const; + MachineBasicBlock *emitFMA3Instr(MachineInstr *MI, + MachineBasicBlock *MBB) const; + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index b2cc8209bf9..206f7b600f3 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -20,7 +20,7 @@ multiclass fma3p_rm opc, string OpcodeStr, PatFrag MemFrag128, PatFrag MemFrag256, ValueType OpVT128, ValueType OpVT256, SDPatternOperator Op = null_frag> { - let isCommutable = 1 in + let isCommutable = 1, usesCustomInserter = 1 in def r : FMA3 opc, string OpcodeStr, [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, (MemFrag128 addr:$src3))))]>; - let isCommutable = 1 in + let isCommutable = 1, usesCustomInserter = 1 in def rY : FMA3 opc, string OpcodeStr, X86MemOperand x86memop, RegisterClass RC, ValueType OpVT, PatFrag mem_frag, SDPatternOperator OpNode = null_frag> { - let isCommutable = 1 in + let isCommutable = 1, usesCustomInserter = 1 in def r : FMA3