Handle various other important cases of multiplying a long constant immediate. For

example, multiplying X*(1 + (1LL << 32)) now produces:

test:
        mov %ECX, DWORD PTR [%ESP + 4]
        mov %EDX, DWORD PTR [%ESP + 8]
        mov %EAX, %ECX
        add %EDX, %ECX
        ret

[[[Note to Alkis: why isn't linear scan generating this code??  This might be a
 problem with your intervals being too conservative:

test:
        mov %EAX, DWORD PTR [%ESP + 4]
        mov %EDX, DWORD PTR [%ESP + 8]
        add %EDX, %EAX
        ret

end note]]]

Whereas GCC produces this:

T:
        sub     %esp, 12
        mov     %edx, DWORD PTR [%esp+16]
        mov     DWORD PTR [%esp+8], %edi
        mov     %ecx, DWORD PTR [%esp+20]
        xor     %edi, %edi
        mov     DWORD PTR [%esp], %ebx
        mov     %ebx, %edi
        mov     %eax, %edx
        mov     DWORD PTR [%esp+4], %esi
        add     %ebx, %edx
        mov     %edi, DWORD PTR [%esp+8]
        lea     %edx, [%ecx+%ebx]
        mov     %esi, DWORD PTR [%esp+4]
        mov     %ebx, DWORD PTR [%esp]
        add     %esp, 12
        ret

I'm not sure example what GCC is smoking here, but it looks like it has just
confused itself with a bunch of stack slots or something.  The intel compiler
is better, but still not good:

T:
        movl      4(%esp), %edx                                 #2.11
        movl      8(%esp), %eax                                 #2.11
        lea       (%eax,%edx), %ecx                             #3.12
        movl      $1, %eax                                      #3.12
        mull      %edx                                          #3.12
        addl      %ecx, %edx                                    #3.12
        ret                                                     #3.12


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@12693 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2004-04-06 04:55:43 +00:00
parent 028adc422d
commit 6ab06d5d19
2 changed files with 86 additions and 38 deletions

View File

@ -1893,8 +1893,19 @@ void ISel::doMultiplyConst(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
unsigned DestReg, const Type *DestTy,
unsigned op0Reg, unsigned ConstRHS) {
static const unsigned MOVrrTab[] = {X86::MOV8rr, X86::MOV16rr, X86::MOV32rr};
static const unsigned MOVriTab[] = {X86::MOV8ri, X86::MOV16ri, X86::MOV32ri};
unsigned Class = getClass(DestTy);
if (ConstRHS == 0) {
BuildMI(*MBB, IP, MOVriTab[Class], 1, DestReg).addImm(0);
return;
} else if (ConstRHS == 1) {
BuildMI(*MBB, IP, MOVrrTab[Class], 1, DestReg).addReg(op0Reg);
return;
}
// If the element size is exactly a power of 2, use a shift to get it.
if (unsigned Shift = ExactLog2(ConstRHS)) {
switch (Class) {
@ -1920,10 +1931,6 @@ void ISel::doMultiplyConst(MachineBasicBlock *MBB,
}
// Most general case, emit a normal multiply...
static const unsigned MOVriTab[] = {
X86::MOV8ri, X86::MOV16ri, X86::MOV32ri
};
unsigned TmpReg = makeAnotherReg(DestTy);
BuildMI(*MBB, IP, MOVriTab[Class], 1, TmpReg).addImm(ConstRHS);
@ -1956,31 +1963,48 @@ void ISel::visitMul(BinaryOperator &I) {
unsigned CLow = CI->getRawValue();
unsigned CHi = CI->getRawValue() >> 32;
if (CLow == 0) {
// If the low part of the constant is all zeros, things are simple.
BuildMI(BB, X86::MOV32ri, 1, DestReg).addImm(0);
doMultiplyConst(BB, BB->end(), DestReg+1, Type::UIntTy, Op0Reg, CHi);
return;
}
// Multiply the two low parts... capturing carry into EDX
unsigned Op1RegL = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32ri, 1, Op1RegL).addImm(CLow);
BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
BuildMI(BB, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL
unsigned OverflowReg = 0;
if (CLow == 1) {
BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(Op0Reg);
} else {
unsigned Op1RegL = makeAnotherReg(Type::UIntTy);
OverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32ri, 1, Op1RegL).addImm(CLow);
BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
BuildMI(BB, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL
unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
BuildMI(BB, X86::MOV32rr, 1,OverflowReg).addReg(X86::EDX);// AL*BL >> 32
}
unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
BuildMI(BB, X86::IMUL32rri, 2, AHBLReg).addReg(Op0Reg+1).addImm(CLow);
doMultiplyConst(BB, BB->end(), AHBLReg, Type::UIntTy, Op0Reg+1, CLow);
unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
unsigned AHBLplusOverflowReg;
if (OverflowReg) {
AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
} else {
AHBLplusOverflowReg = AHBLReg;
}
if (CHi != 0) {
if (CHi == 0) {
BuildMI(BB, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg);
} else {
unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
BuildMI(BB, X86::IMUL32rri, 2, ALBHReg).addReg(Op0Reg).addImm(CHi);
doMultiplyConst(BB, BB->end(), ALBHReg, Type::UIntTy, Op0Reg, CHi);
BuildMI(BB, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
} else {
BuildMI(BB, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg);
}
} else {
unsigned Op1Reg = getReg(I.getOperand(1));

View File

@ -1893,8 +1893,19 @@ void ISel::doMultiplyConst(MachineBasicBlock *MBB,
MachineBasicBlock::iterator IP,
unsigned DestReg, const Type *DestTy,
unsigned op0Reg, unsigned ConstRHS) {
static const unsigned MOVrrTab[] = {X86::MOV8rr, X86::MOV16rr, X86::MOV32rr};
static const unsigned MOVriTab[] = {X86::MOV8ri, X86::MOV16ri, X86::MOV32ri};
unsigned Class = getClass(DestTy);
if (ConstRHS == 0) {
BuildMI(*MBB, IP, MOVriTab[Class], 1, DestReg).addImm(0);
return;
} else if (ConstRHS == 1) {
BuildMI(*MBB, IP, MOVrrTab[Class], 1, DestReg).addReg(op0Reg);
return;
}
// If the element size is exactly a power of 2, use a shift to get it.
if (unsigned Shift = ExactLog2(ConstRHS)) {
switch (Class) {
@ -1920,10 +1931,6 @@ void ISel::doMultiplyConst(MachineBasicBlock *MBB,
}
// Most general case, emit a normal multiply...
static const unsigned MOVriTab[] = {
X86::MOV8ri, X86::MOV16ri, X86::MOV32ri
};
unsigned TmpReg = makeAnotherReg(DestTy);
BuildMI(*MBB, IP, MOVriTab[Class], 1, TmpReg).addImm(ConstRHS);
@ -1956,31 +1963,48 @@ void ISel::visitMul(BinaryOperator &I) {
unsigned CLow = CI->getRawValue();
unsigned CHi = CI->getRawValue() >> 32;
if (CLow == 0) {
// If the low part of the constant is all zeros, things are simple.
BuildMI(BB, X86::MOV32ri, 1, DestReg).addImm(0);
doMultiplyConst(BB, BB->end(), DestReg+1, Type::UIntTy, Op0Reg, CHi);
return;
}
// Multiply the two low parts... capturing carry into EDX
unsigned Op1RegL = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32ri, 1, Op1RegL).addImm(CLow);
BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
BuildMI(BB, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL
unsigned OverflowReg = 0;
if (CLow == 1) {
BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(Op0Reg);
} else {
unsigned Op1RegL = makeAnotherReg(Type::UIntTy);
OverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32ri, 1, Op1RegL).addImm(CLow);
BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
BuildMI(BB, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL
unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
BuildMI(BB, X86::MOV32rr, 1,OverflowReg).addReg(X86::EDX);// AL*BL >> 32
}
unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
BuildMI(BB, X86::IMUL32rri, 2, AHBLReg).addReg(Op0Reg+1).addImm(CLow);
doMultiplyConst(BB, BB->end(), AHBLReg, Type::UIntTy, Op0Reg+1, CLow);
unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
unsigned AHBLplusOverflowReg;
if (OverflowReg) {
AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
BuildMI(BB, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
} else {
AHBLplusOverflowReg = AHBLReg;
}
if (CHi != 0) {
if (CHi == 0) {
BuildMI(BB, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg);
} else {
unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
BuildMI(BB, X86::IMUL32rri, 2, ALBHReg).addReg(Op0Reg).addImm(CHi);
doMultiplyConst(BB, BB->end(), ALBHReg, Type::UIntTy, Op0Reg, CHi);
BuildMI(BB, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
} else {
BuildMI(BB, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg);
}
} else {
unsigned Op1Reg = getReg(I.getOperand(1));