Efficiently handle a long multiplication by a constant. For this testcase:

long %test(long %X) {
        %Y = mul long %X, 123
        ret long %Y
}

we used to generate:

test:
        sub %ESP, 12
        mov DWORD PTR [%ESP + 8], %ESI
        mov DWORD PTR [%ESP + 4], %EDI
        mov DWORD PTR [%ESP], %EBX
        mov %ECX, DWORD PTR [%ESP + 16]
        mov %ESI, DWORD PTR [%ESP + 20]
        mov %EDI, 123
        mov %EBX, 0
        mov %EAX, %ECX
        mul %EDI
        imul %ESI, %EDI
        add %ESI, %EDX
        imul %ECX, %EBX
        add %ESI, %ECX
        mov %EDX, %ESI
        mov %EBX, DWORD PTR [%ESP]
        mov %EDI, DWORD PTR [%ESP + 4]
        mov %ESI, DWORD PTR [%ESP + 8]
        add %ESP, 12
        ret

Now we emit:
test:
        mov %EAX, DWORD PTR [%ESP + 4]
        mov %ECX, DWORD PTR [%ESP + 8]
        mov %EDX, 123
        mul %EDX
        imul %ECX, %ECX, 123
        add %ECX, %EDX
        mov %EDX, %ECX
        ret

Which, incidently, is substantially nicer than what GCC manages:
T:
        sub     %esp, 8
        mov     %eax, 123
        mov     DWORD PTR [%esp], %ebx
        mov     %ebx, DWORD PTR [%esp+16]
        mov     DWORD PTR [%esp+4], %esi
        mov     %esi, DWORD PTR [%esp+12]
        imul    %ecx, %ebx, 123
        mov     %ebx, DWORD PTR [%esp]
        mul     %esi
        mov     %esi, DWORD PTR [%esp+4]
        add     %esp, 8
        lea     %edx, [%ecx+%edx]
        ret


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@12692 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2004-04-06 04:29:36 +00:00
parent 1e07e0c0c9
commit 028adc422d
2 changed files with 114 additions and 48 deletions

View File

@ -1940,7 +1940,7 @@ void ISel::visitMul(BinaryOperator &I) {
unsigned DestReg = getReg(I);
// Simple scalar multiply?
if (I.getType() != Type::LongTy && I.getType() != Type::ULongTy) {
if (getClass(I.getType()) != cLong) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1))) {
unsigned Val = (unsigned)CI->getRawValue(); // Cannot be 64-bit constant
MachineBasicBlock::iterator MBBI = BB->end();
@ -1951,31 +1951,64 @@ void ISel::visitMul(BinaryOperator &I) {
doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg);
}
} else {
unsigned Op1Reg = getReg(I.getOperand(1));
// Long value. We have to do things the hard way...
// Multiply the two low parts... capturing carry into EDX
BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL
if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1))) {
unsigned CLow = CI->getRawValue();
unsigned CHi = CI->getRawValue() >> 32;
unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
MachineBasicBlock::iterator MBBI = BB->end();
unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
BuildMI(*BB, MBBI, X86::IMUL32rr,2,AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
MBBI = BB->end();
unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
BuildMI(*BB, MBBI, X86::IMUL32rr,2,ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
// Multiply the two low parts... capturing carry into EDX
unsigned Op1RegL = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32ri, 1, Op1RegL).addImm(CLow);
BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
BuildMI(BB, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL
unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
BuildMI(BB, X86::IMUL32rri, 2, AHBLReg).addReg(Op0Reg+1).addImm(CLow);
unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
if (CHi != 0) {
unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
BuildMI(BB, X86::IMUL32rri, 2, ALBHReg).addReg(Op0Reg).addImm(CHi);
BuildMI(BB, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
} else {
BuildMI(BB, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg);
}
} else {
unsigned Op1Reg = getReg(I.getOperand(1));
// Multiply the two low parts... capturing carry into EDX
BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL
unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
MachineBasicBlock::iterator MBBI = BB->end();
unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
BuildMI(*BB, MBBI, X86::IMUL32rr, 2,
AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
MBBI = BB->end();
unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
BuildMI(*BB, MBBI, X86::IMUL32rr, 2,
ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
}
}
}

View File

@ -1940,7 +1940,7 @@ void ISel::visitMul(BinaryOperator &I) {
unsigned DestReg = getReg(I);
// Simple scalar multiply?
if (I.getType() != Type::LongTy && I.getType() != Type::ULongTy) {
if (getClass(I.getType()) != cLong) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1))) {
unsigned Val = (unsigned)CI->getRawValue(); // Cannot be 64-bit constant
MachineBasicBlock::iterator MBBI = BB->end();
@ -1951,31 +1951,64 @@ void ISel::visitMul(BinaryOperator &I) {
doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg);
}
} else {
unsigned Op1Reg = getReg(I.getOperand(1));
// Long value. We have to do things the hard way...
// Multiply the two low parts... capturing carry into EDX
BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL
if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1))) {
unsigned CLow = CI->getRawValue();
unsigned CHi = CI->getRawValue() >> 32;
unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
MachineBasicBlock::iterator MBBI = BB->end();
unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
BuildMI(*BB, MBBI, X86::IMUL32rr,2,AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
MBBI = BB->end();
unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
BuildMI(*BB, MBBI, X86::IMUL32rr,2,ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
// Multiply the two low parts... capturing carry into EDX
unsigned Op1RegL = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32ri, 1, Op1RegL).addImm(CLow);
BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
BuildMI(BB, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL
unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
BuildMI(BB, X86::IMUL32rri, 2, AHBLReg).addReg(Op0Reg+1).addImm(CLow);
unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
if (CHi != 0) {
unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
BuildMI(BB, X86::IMUL32rri, 2, ALBHReg).addReg(Op0Reg).addImm(CHi);
BuildMI(BB, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
} else {
BuildMI(BB, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg);
}
} else {
unsigned Op1Reg = getReg(I.getOperand(1));
// Multiply the two low parts... capturing carry into EDX
BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL
unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
MachineBasicBlock::iterator MBBI = BB->end();
unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
BuildMI(*BB, MBBI, X86::IMUL32rr, 2,
AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
MBBI = BB->end();
unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
BuildMI(*BB, MBBI, X86::IMUL32rr, 2,
ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
}
}
}