diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index cadec682a43..6884c92163b 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -107,6 +107,8 @@ private: bool X86SelectShift(const Instruction *I); + bool X86SelectDivRem(const Instruction *I); + bool X86SelectSelect(const Instruction *I); bool X86SelectTrunc(const Instruction *I); @@ -1235,6 +1237,124 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { return true; } +bool X86FastISel::X86SelectDivRem(const Instruction *I) { + const static unsigned NumTypes = 4; // i8, i16, i32, i64 + const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem + const static bool S = true; // IsSigned + const static bool U = false; // !IsSigned + const static unsigned Copy = TargetOpcode::COPY; + // For the X86 DIV/IDIV instruction, in most cases the dividend + // (numerator) must be in a specific register pair highreg:lowreg, + // producing the quotient in lowreg and the remainder in highreg. + // For most data types, to set up the instruction, the dividend is + // copied into lowreg, and lowreg is sign-extended or zero-extended + // into highreg. The exception is i8, where the dividend is defined + // as a single register rather than a register pair, and we + // therefore directly sign-extend or zero-extend the dividend into + // lowreg, instead of copying, and ignore the highreg. + const static struct DivRemEntry { + // The following portion depends only on the data type. + const TargetRegisterClass *RC; + unsigned LowInReg; // low part of the register pair + unsigned HighInReg; // high part of the register pair + // The following portion depends on both the data type and the operation. + struct DivRemResult { + unsigned OpDivRem; // The specific DIV/IDIV opcode to use. + unsigned OpSignExtend; // Opcode for sign-extending lowreg into + // highreg, or copying a zero into highreg. + unsigned OpCopy; // Opcode for copying dividend into lowreg, or + // zero/sign-extending into lowreg for i8. + unsigned DivRemResultReg; // Register containing the desired result. + bool IsOpSigned; // Whether to use signed or unsigned form. + } ResultTable[NumOps]; + } OpTable[NumTypes] = { + { &X86::GR8RegClass, X86::AX, 0, { + { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv + { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem + { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv + { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem + } + }, // i8 + { &X86::GR16RegClass, X86::AX, X86::DX, { + { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv + { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem + { X86::DIV16r, X86::MOV16r0, Copy, X86::AX, U }, // UDiv + { X86::DIV16r, X86::MOV16r0, Copy, X86::DX, U }, // URem + } + }, // i16 + { &X86::GR32RegClass, X86::EAX, X86::EDX, { + { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv + { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem + { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv + { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem + } + }, // i32 + { &X86::GR64RegClass, X86::RAX, X86::RDX, { + { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv + { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem + { X86::DIV64r, X86::MOV64r0, Copy, X86::RAX, U }, // UDiv + { X86::DIV64r, X86::MOV64r0, Copy, X86::RDX, U }, // URem + } + }, // i64 + }; + + MVT VT; + if (!isTypeLegal(I->getType(), VT)) + return false; + + unsigned TypeIndex, OpIndex; + switch (VT.SimpleTy) { + default: return false; + case MVT::i8: TypeIndex = 0; break; + case MVT::i16: TypeIndex = 1; break; + case MVT::i32: TypeIndex = 2; break; + case MVT::i64: TypeIndex = 3; + if (!Subtarget->is64Bit()) + return false; + break; + } + + switch (I->getOpcode()) { + default: llvm_unreachable("Unexpected div/rem opcode"); + case Instruction::SDiv: OpIndex = 0; break; + case Instruction::SRem: OpIndex = 1; break; + case Instruction::UDiv: OpIndex = 2; break; + case Instruction::URem: OpIndex = 3; break; + } + + const DivRemEntry &TypeEntry = OpTable[TypeIndex]; + const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex]; + unsigned Op0Reg = getRegForValue(I->getOperand(0)); + if (Op0Reg == 0) + return false; + unsigned Op1Reg = getRegForValue(I->getOperand(1)); + if (Op1Reg == 0) + return false; + + // Move op0 into low-order input register. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg); + // Zero-extend or sign-extend into high-order input register. + if (OpEntry.OpSignExtend) { + if (OpEntry.IsOpSigned) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpEntry.OpSignExtend)); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg); + } + // Generate the DIV/IDIV instruction. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpEntry.OpDivRem)).addReg(Op1Reg); + // Copy output register into result register. + unsigned ResultReg = createResultReg(TypeEntry.RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg); + UpdateValueMap(I, ResultReg); + + return true; +} + bool X86FastISel::X86SelectSelect(const Instruction *I) { MVT VT; if (!isTypeLegal(I->getType(), VT)) @@ -2084,6 +2204,11 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::AShr: case Instruction::Shl: return X86SelectShift(I); + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + return X86SelectDivRem(I); case Instruction::Select: return X86SelectSelect(I); case Instruction::Trunc: diff --git a/test/CodeGen/X86/fast-isel-divrem-x86-64.ll b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll new file mode 100644 index 00000000000..45494f139e2 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-divrem-x86-64.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s + +define i64 @test_sdiv64(i64 %dividend, i64 %divisor) nounwind { +entry: + %result = sdiv i64 %dividend, %divisor + ret i64 %result +} + +; CHECK: test_sdiv64: +; CHECK: cqto +; CHECK: idivq + +define i64 @test_srem64(i64 %dividend, i64 %divisor) nounwind { +entry: + %result = srem i64 %dividend, %divisor + ret i64 %result +} + +; CHECK: test_srem64: +; CHECK: cqto +; CHECK: idivq + +define i64 @test_udiv64(i64 %dividend, i64 %divisor) nounwind { +entry: + %result = udiv i64 %dividend, %divisor + ret i64 %result +} + +; CHECK: test_udiv64: +; CHECK: xorl +; CHECK: divq + +define i64 @test_urem64(i64 %dividend, i64 %divisor) nounwind { +entry: + %result = urem i64 %dividend, %divisor + ret i64 %result +} + +; CHECK: test_urem64: +; CHECK: xorl +; CHECK: divq diff --git a/test/CodeGen/X86/fast-isel-divrem.ll b/test/CodeGen/X86/fast-isel-divrem.ll new file mode 100644 index 00000000000..7aba7f7b795 --- /dev/null +++ b/test/CodeGen/X86/fast-isel-divrem.ll @@ -0,0 +1,122 @@ +; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s +; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s + +define i8 @test_sdiv8(i8 %dividend, i8 %divisor) nounwind { +entry: + %result = sdiv i8 %dividend, %divisor + ret i8 %result +} + +; CHECK: test_sdiv8: +; CHECK: movsbw +; CHECK: idivb + +define i8 @test_srem8(i8 %dividend, i8 %divisor) nounwind { +entry: + %result = srem i8 %dividend, %divisor + ret i8 %result +} + +; CHECK: test_srem8: +; CHECK: movsbw +; CHECK: idivb + +define i8 @test_udiv8(i8 %dividend, i8 %divisor) nounwind { +entry: + %result = udiv i8 %dividend, %divisor + ret i8 %result +} + +; CHECK: test_udiv8: +; CHECK: movzbw +; CHECK: divb + +define i8 @test_urem8(i8 %dividend, i8 %divisor) nounwind { +entry: + %result = urem i8 %dividend, %divisor + ret i8 %result +} + +; CHECK: test_urem8: +; CHECK: movzbw +; CHECK: divb + +define i16 @test_sdiv16(i16 %dividend, i16 %divisor) nounwind { +entry: + %result = sdiv i16 %dividend, %divisor + ret i16 %result +} + +; CHECK: test_sdiv16: +; CHECK: cwtd +; CHECK: idivw + +define i16 @test_srem16(i16 %dividend, i16 %divisor) nounwind { +entry: + %result = srem i16 %dividend, %divisor + ret i16 %result +} + +; CHECK: test_srem16: +; CHECK: cwtd +; CHECK: idivw + +define i16 @test_udiv16(i16 %dividend, i16 %divisor) nounwind { +entry: + %result = udiv i16 %dividend, %divisor + ret i16 %result +} + +; CHECK: test_udiv16: +; CHECK: xorl +; CHECK: divw + +define i16 @test_urem16(i16 %dividend, i16 %divisor) nounwind { +entry: + %result = urem i16 %dividend, %divisor + ret i16 %result +} + +; CHECK: test_urem16: +; CHECK: xorl +; CHECK: divw + +define i32 @test_sdiv32(i32 %dividend, i32 %divisor) nounwind { +entry: + %result = sdiv i32 %dividend, %divisor + ret i32 %result +} + +; CHECK: test_sdiv32: +; CHECK: cltd +; CHECK: idivl + +define i32 @test_srem32(i32 %dividend, i32 %divisor) nounwind { +entry: + %result = srem i32 %dividend, %divisor + ret i32 %result +} + +; CHECK: test_srem32: +; CHECK: cltd +; CHECK: idivl + +define i32 @test_udiv32(i32 %dividend, i32 %divisor) nounwind { +entry: + %result = udiv i32 %dividend, %divisor + ret i32 %result +} + +; CHECK: test_udiv32: +; CHECK: xorl +; CHECK: divl + +define i32 @test_urem32(i32 %dividend, i32 %divisor) nounwind { +entry: + %result = urem i32 %dividend, %divisor + ret i32 %result +} + +; CHECK: test_urem32: +; CHECK: xorl +; CHECK: divl