[Hexagon] Adding vmux instruction. Removing old transfer instructions and updating references.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226184 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Colin LeMahieu 2015-01-15 18:16:00 +00:00
parent 4f158a708b
commit 500b0d97a1
4 changed files with 62 additions and 303 deletions

View File

@ -1069,6 +1069,25 @@ def: T_cmp64_rr_pat<C2_cmpgtup, setugt>;
def: T_cmp64_rr_pat<C2_cmpgtp, RevCmp<setlt>>;
def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>;
let isCodeGenOnly = 0 in
def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd),
(ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
"$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
let hasSideEffects = 0;
bits<5> Rd;
bits<2> Pu;
bits<5> Rs;
bits<5> Rt;
let IClass = 0b1101;
let Inst{27-24} = 0b0001;
let Inst{20-16} = Rs;
let Inst{12-8} = Rt;
let Inst{6-5} = Pu;
let Inst{4-0} = Rd;
}
class T_ALU64_rr<string mnemonic, string suffix, bits<4> RegType,
bits<3> MajOp, bits<3> MinOp, bit OpsRev, bit IsComm,
string Op2Pfx>

View File

@ -452,6 +452,22 @@ def F2_sffma_sc: MInst <
let Inst{4-0} = Rx;
}
let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 3,
isPseudo = 1, InputType = "imm" in
def MUX_ir_f : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, f32Ext:$src3),
"$dst = mux($src1, $src2, #$src3)",
[(set F32:$dst, (f32 (select I1:$src1, F32:$src2, fpimm:$src3)))]>,
Requires<[HasV5T]>;
let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 2,
isPseudo = 1, InputType = "imm" in
def MUX_ri_f : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, f32Ext:$src2, IntRegs:$src3),
"$dst = mux($src1, #$src2, $src3)",
[(set F32:$dst, (f32 (select I1:$src1, fpimm:$src2, F32:$src3)))]>,
Requires<[HasV5T]>;
// Classify floating-point value
let isFP = 1, isCodeGenOnly = 0 in
def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>;
@ -619,139 +635,6 @@ def : Pat<(i1 (setune (f64 DoubleRegs:$src1), (fpimm:$src2))),
(f64 (CONST64_Float_Real fpimm:$src2)))))>,
Requires<[HasV5T]>;
// Convert Integer to Floating Point.
def CONVERT_d2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
"$dst = convert_d2sf($src)",
[(set (f32 IntRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_ud2sf : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
"$dst = convert_ud2sf($src)",
[(set (f32 IntRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_uw2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
"$dst = convert_uw2sf($src)",
[(set (f32 IntRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_w2sf : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
"$dst = convert_w2sf($src)",
[(set (f32 IntRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_d2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
"$dst = convert_d2df($src)",
[(set (f64 DoubleRegs:$dst), (sint_to_fp (i64 DoubleRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_ud2df : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
"$dst = convert_ud2df($src)",
[(set (f64 DoubleRegs:$dst), (uint_to_fp (i64 DoubleRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_uw2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
"$dst = convert_uw2df($src)",
[(set (f64 DoubleRegs:$dst), (uint_to_fp (i32 IntRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_w2df : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
"$dst = convert_w2df($src)",
[(set (f64 DoubleRegs:$dst), (sint_to_fp (i32 IntRegs:$src)))]>,
Requires<[HasV5T]>;
// Convert Floating Point to Integer - default.
def CONVERT_df2uw : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
"$dst = convert_df2uw($src):chop",
[(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_df2w : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
"$dst = convert_df2w($src):chop",
[(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_sf2uw : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
"$dst = convert_sf2uw($src):chop",
[(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_sf2w : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
"$dst = convert_sf2w($src):chop",
[(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_df2d : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
"$dst = convert_df2d($src):chop",
[(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_df2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
"$dst = convert_df2ud($src):chop",
[(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_sf2d : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
"$dst = convert_sf2d($src):chop",
[(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
Requires<[HasV5T]>;
def CONVERT_sf2ud : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
"$dst = convert_sf2ud($src):chop",
[(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
Requires<[HasV5T]>;
// Convert Floating Point to Integer: non-chopped.
let AddedComplexity = 20 in
def CONVERT_df2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
"$dst = convert_df2uw($src)",
[(set (i32 IntRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
Requires<[HasV5T, IEEERndNearV5T]>;
let AddedComplexity = 20 in
def CONVERT_df2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src),
"$dst = convert_df2w($src)",
[(set (i32 IntRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
Requires<[HasV5T, IEEERndNearV5T]>;
let AddedComplexity = 20 in
def CONVERT_sf2uw_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
"$dst = convert_sf2uw($src)",
[(set (i32 IntRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
Requires<[HasV5T, IEEERndNearV5T]>;
let AddedComplexity = 20 in
def CONVERT_sf2w_nchop : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
"$dst = convert_sf2w($src)",
[(set (i32 IntRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
Requires<[HasV5T, IEEERndNearV5T]>;
let AddedComplexity = 20 in
def CONVERT_df2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
"$dst = convert_df2d($src)",
[(set (i64 DoubleRegs:$dst), (fp_to_sint (f64 DoubleRegs:$src)))]>,
Requires<[HasV5T, IEEERndNearV5T]>;
let AddedComplexity = 20 in
def CONVERT_df2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
"$dst = convert_df2ud($src)",
[(set (i64 DoubleRegs:$dst), (fp_to_uint (f64 DoubleRegs:$src)))]>,
Requires<[HasV5T, IEEERndNearV5T]>;
let AddedComplexity = 20 in
def CONVERT_sf2d_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
"$dst = convert_sf2d($src)",
[(set (i64 DoubleRegs:$dst), (fp_to_sint (f32 IntRegs:$src)))]>,
Requires<[HasV5T, IEEERndNearV5T]>;
let AddedComplexity = 20 in
def CONVERT_sf2ud_nchop : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src),
"$dst = convert_sf2ud($src)",
[(set (i64 DoubleRegs:$dst), (fp_to_uint (f32 IntRegs:$src)))]>,
Requires<[HasV5T, IEEERndNearV5T]>;
// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
def : Pat <(i32 (bitconvert (f32 IntRegs:$src))),
(i32 (A2_tfr IntRegs:$src))>,
@ -769,117 +652,30 @@ def : Pat <(f64 (bitconvert (i64 DoubleRegs:$src))),
(f64 (A2_tfrp DoubleRegs:$src))>,
Requires<[HasV5T]>;
def FMADD_sp : ALU64_acc<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
"$dst += sfmpy($src2, $src3)",
[(set (f32 IntRegs:$dst),
(fma IntRegs:$src2, IntRegs:$src3, IntRegs:$src1))],
"$src1 = $dst">,
Requires<[HasV5T]>;
// Floating point max/min.
let AddedComplexity = 100 in
def FMAX_sp : ALU64_rr<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = sfmax($src1, $src2)",
[(set IntRegs:$dst, (f32 (select (i1 (setolt IntRegs:$src2,
IntRegs:$src1)),
IntRegs:$src1,
IntRegs:$src2)))]>,
Requires<[HasV5T]>;
let AddedComplexity = 100 in
def FMIN_sp : ALU64_rr<(outs IntRegs:$dst),
(ins IntRegs:$src1, IntRegs:$src2),
"$dst = sfmin($src1, $src2)",
[(set IntRegs:$dst, (f32 (select (i1 (setogt IntRegs:$src2,
IntRegs:$src1)),
IntRegs:$src1,
IntRegs:$src2)))]>,
Requires<[HasV5T]>;
// Pseudo instruction to encode a set of conditional transfers.
// This instruction is used instead of a mux and trades-off codesize
// for performance. We conduct this transformation optimistically in
// the hope that these instructions get promoted to dot-new transfers.
let AddedComplexity = 100, isPredicated = 1 in
def TFR_condset_rr_f : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
IntRegs:$src2,
IntRegs:$src3),
"Error; should not emit",
[(set IntRegs:$dst, (f32 (select PredRegs:$src1,
IntRegs:$src2,
IntRegs:$src3)))]>,
Requires<[HasV5T]>;
let AddedComplexity = 100, isPredicated = 1 in
def TFR_condset_rr64_f : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
DoubleRegs:$src2,
DoubleRegs:$src3),
"Error; should not emit",
[(set DoubleRegs:$dst, (f64 (select PredRegs:$src1,
DoubleRegs:$src2,
DoubleRegs:$src3)))]>,
Requires<[HasV5T]>;
let AddedComplexity = 100, isPredicated = 1 in
def TFR_condset_ri_f : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, IntRegs:$src2, f32imm:$src3),
"Error; should not emit",
[(set IntRegs:$dst,
(f32 (select PredRegs:$src1, IntRegs:$src2, fpimm:$src3)))]>,
Requires<[HasV5T]>;
let AddedComplexity = 100, isPredicated = 1 in
def TFR_condset_ir_f : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, f32imm:$src2, IntRegs:$src3),
"Error; should not emit",
[(set IntRegs:$dst,
(f32 (select PredRegs:$src1, fpimm:$src2, IntRegs:$src3)))]>,
Requires<[HasV5T]>;
let AddedComplexity = 100, isPredicated = 1 in
def TFR_condset_ii_f : ALU32_rr<(outs IntRegs:$dst),
(ins PredRegs:$src1, f32imm:$src2, f32imm:$src3),
"Error; should not emit",
[(set IntRegs:$dst, (f32 (select PredRegs:$src1,
fpimm:$src2,
fpimm:$src3)))]>,
Requires<[HasV5T]>;
def : Pat <(select (i1 (setult (f32 IntRegs:$src1), (f32 IntRegs:$src2))),
(f32 IntRegs:$src3),
(f32 IntRegs:$src4)),
(TFR_condset_rr_f (F2_sfcmpgt IntRegs:$src2, IntRegs:$src1), IntRegs:$src4,
(C2_mux (F2_sfcmpgt IntRegs:$src2, IntRegs:$src1), IntRegs:$src4,
IntRegs:$src3)>, Requires<[HasV5T]>;
def : Pat <(select (i1 (setult (f64 DoubleRegs:$src1), (f64 DoubleRegs:$src2))),
(f64 DoubleRegs:$src3),
(f64 DoubleRegs:$src4)),
(TFR_condset_rr64_f (F2_dfcmpgt DoubleRegs:$src2, DoubleRegs:$src1),
(C2_vmux (F2_dfcmpgt DoubleRegs:$src2, DoubleRegs:$src1),
DoubleRegs:$src4, DoubleRegs:$src3)>, Requires<[HasV5T]>;
// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
def : Pat <(select (not PredRegs:$src1), fpimm:$src2, fpimm:$src3),
(TFR_condset_ii_f PredRegs:$src1, fpimm:$src3, fpimm:$src2)>;
// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
// => r0 = TFR_condset_ri(p0, r1, #i)
def : Pat <(select (not PredRegs:$src1), fpimm:$src2, IntRegs:$src3),
(TFR_condset_ri_f PredRegs:$src1, IntRegs:$src3, fpimm:$src2)>;
(MUX_ri_f PredRegs:$src1, IntRegs:$src3, fpimm:$src2)>;
// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
// => r0 = TFR_condset_ir(p0, #i, r1)
def : Pat <(select (not PredRegs:$src1), IntRegs:$src2, fpimm:$src3),
(TFR_condset_ir_f PredRegs:$src1, fpimm:$src3, IntRegs:$src2)>;
(MUX_ir_f PredRegs:$src1, fpimm:$src3, IntRegs:$src2)>;
def : Pat <(i32 (fp_to_sint (f64 DoubleRegs:$src1))),
(i32 (EXTRACT_SUBREG (i64 (CONVERT_df2d (f64 DoubleRegs:$src1))), subreg_loreg))>,
(i32 (EXTRACT_SUBREG (i64 (F2_conv_df2d_chop (f64 DoubleRegs:$src1))), subreg_loreg))>,
Requires<[HasV5T]>;
def : Pat <(fabs (f32 IntRegs:$src1)),

View File

@ -90,39 +90,8 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
++MII) {
MachineInstr *MI = MII;
int Opc1, Opc2;
switch(MI->getOpcode()) {
case Hexagon::TFR_condset_rr_f:
case Hexagon::TFR_condset_rr64_f: {
int DestReg = MI->getOperand(0).getReg();
int SrcReg1 = MI->getOperand(2).getReg();
int SrcReg2 = MI->getOperand(3).getReg();
if (MI->getOpcode() == Hexagon::TFR_condset_rr_f) {
Opc1 = Hexagon::A2_tfrt;
Opc2 = Hexagon::A2_tfrf;
}
else if (MI->getOpcode() == Hexagon::TFR_condset_rr64_f) {
Opc1 = Hexagon::A2_tfrpt;
Opc2 = Hexagon::A2_tfrpf;
}
// Minor optimization: do not emit the predicated copy if the source
// and the destination is the same register.
if (DestReg != SrcReg1) {
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc1),
DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
}
if (DestReg != SrcReg2) {
BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Opc2),
DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
}
MII = MBB->erase(MI);
--MII;
break;
}
case Hexagon::TFR_condset_ri:
case Hexagon::TFR_condset_ri_f: {
case Hexagon::TFR_condset_ri: {
int DestReg = MI->getOperand(0).getReg();
int SrcReg1 = MI->getOperand(2).getReg();
@ -133,38 +102,23 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
TII->get(Hexagon::A2_tfrt), DestReg).
addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
}
if (MI->getOpcode() == Hexagon::TFR_condset_ri ) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::C2_cmoveif), DestReg).
addReg(MI->getOperand(1).getReg()).
addImm(MI->getOperand(3).getImm());
} else if (MI->getOpcode() == Hexagon::TFR_condset_ri_f ) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::TFRI_cNotPt_f), DestReg).
addReg(MI->getOperand(1).getReg()).
addFPImm(MI->getOperand(3).getFPImm());
}
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::C2_cmoveif), DestReg).
addReg(MI->getOperand(1).getReg()).
addImm(MI->getOperand(3).getImm());
MII = MBB->erase(MI);
--MII;
break;
}
case Hexagon::TFR_condset_ir:
case Hexagon::TFR_condset_ir_f: {
case Hexagon::TFR_condset_ir: {
int DestReg = MI->getOperand(0).getReg();
int SrcReg2 = MI->getOperand(3).getReg();
if (MI->getOpcode() == Hexagon::TFR_condset_ir ) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::C2_cmoveit), DestReg).
addReg(MI->getOperand(1).getReg()).
addImm(MI->getOperand(2).getImm());
} else if (MI->getOpcode() == Hexagon::TFR_condset_ir_f ) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::TFRI_cPt_f), DestReg).
addReg(MI->getOperand(1).getReg()).
addFPImm(MI->getOperand(2).getFPImm());
}
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::C2_cmoveit), DestReg).
addReg(MI->getOperand(1).getReg()).
addImm(MI->getOperand(2).getImm());
// Do not emit the predicated copy if the source and
// the destination is the same register.
@ -177,30 +131,18 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
--MII;
break;
}
case Hexagon::TFR_condset_ii:
case Hexagon::TFR_condset_ii_f: {
case Hexagon::TFR_condset_ii: {
int DestReg = MI->getOperand(0).getReg();
int SrcReg1 = MI->getOperand(1).getReg();
if (MI->getOpcode() == Hexagon::TFR_condset_ii ) {
int Immed1 = MI->getOperand(2).getImm();
int Immed2 = MI->getOperand(3).getImm();
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::C2_cmoveit),
DestReg).addReg(SrcReg1).addImm(Immed1);
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::C2_cmoveif),
DestReg).addReg(SrcReg1).addImm(Immed2);
} else if (MI->getOpcode() == Hexagon::TFR_condset_ii_f ) {
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::TFRI_cPt_f), DestReg).
addReg(SrcReg1).
addFPImm(MI->getOperand(2).getFPImm());
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::TFRI_cNotPt_f), DestReg).
addReg(SrcReg1).
addFPImm(MI->getOperand(3).getFPImm());
}
int Immed1 = MI->getOperand(2).getImm();
int Immed2 = MI->getOperand(3).getImm();
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::C2_cmoveit),
DestReg).addReg(SrcReg1).addImm(Immed1);
BuildMI(*MBB, MII, MI->getDebugLoc(),
TII->get(Hexagon::C2_cmoveif),
DestReg).addReg(SrcReg1).addImm(Immed2);
MII = MBB->erase(MI);
--MII;
break;

View File

@ -63,4 +63,6 @@
0x03 0xd5 0x31 0xc7
# CHECK: p3 = !tstbit(r17, r21)
0x11 0xc2 0x03 0x89
# CHECK: r17 = vitpack(p3, p2)
# CHECK: r17 = vitpack(p3, p2)
0x70 0xde 0x14 0xd1
# CHECK: r17:16 = vmux(p3, r21:20, r31:30)