From 43a5ff8d402dcd71629d1ff9f32e8f46806ab8e3 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 20 Oct 2003 05:53:31 +0000 Subject: [PATCH] Emit x86 instructions for: A = B op C, where A and B are 16-bit registers, C is a constant which can be sign-extended from 8 bits without value loss, and op is one of: add, sub, imul, and, or, xor. This allows the JIT to emit the one byte version of the constant instead of the two or 4 byte version. Because these instructions are very common, this can save a LOT of code space. For example, I sampled two benchmarks, 176.gcc and 254.gap. BM Old New Reduction 176.gcc 2673621 2548962 4.89% 254.gap 498261 475104 4.87% Note that while the percentage is not spectacular, this did eliminate 124.6 _KILOBYTES_ of codespace from gcc. Not bad. Note that this doesn't effect the llc version at all, because the assembler already does this optimization. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@9284 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/PeepholeOptimizer.cpp | 40 ++++++++++++++++++++++++++++ lib/Target/X86/X86InstrInfo.td | 14 ++++++++++ lib/Target/X86/X86PeepholeOpt.cpp | 40 ++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) diff --git a/lib/Target/X86/PeepholeOptimizer.cpp b/lib/Target/X86/PeepholeOptimizer.cpp index 559baeeae9d..efb6cc3e6a3 100644 --- a/lib/Target/X86/PeepholeOptimizer.cpp +++ b/lib/Target/X86/PeepholeOptimizer.cpp @@ -51,6 +51,46 @@ bool PH::PeepholeOptimize(MachineBasicBlock &MBB, } return false; + // A large number of X86 instructions have forms which take an 8-bit + // immediate despite the fact that the operands are 16 or 32 bits. Because + // this can save three bytes of code size (and icache space), we want to + // shrink them if possible. + case X86::ADDri16: case X86::ADDri32: + case X86::SUBri16: case X86::SUBri32: + case X86::IMULri16: case X86::IMULri32: + case X86::ANDri16: case X86::ANDri32: + case X86::ORri16: case X86::ORri32: + case X86::XORri16: case X86::XORri32: + assert(MI->getNumOperands() == 3 && "These should all have 3 operands!"); + if (MI->getOperand(2).isImmediate()) { + int Val = MI->getOperand(2).getImmedValue(); + // If the value is the same when signed extended from 8 bits... + if (Val == (signed int)(signed char)Val) { + unsigned Opcode; + switch (MI->getOpcode()) { + default: assert(0 && "Unknown opcode value!"); + case X86::ADDri16: Opcode = X86::ADDri16b; break; + case X86::ADDri32: Opcode = X86::ADDri32b; break; + case X86::SUBri16: Opcode = X86::SUBri16b; break; + case X86::SUBri32: Opcode = X86::SUBri32b; break; + case X86::IMULri16: Opcode = X86::IMULri16b; break; + case X86::IMULri32: Opcode = X86::IMULri32b; break; + case X86::ANDri16: Opcode = X86::ANDri16b; break; + case X86::ANDri32: Opcode = X86::ANDri32b; break; + case X86::ORri16: Opcode = X86::ORri16b; break; + case X86::ORri32: Opcode = X86::ORri32b; break; + case X86::XORri16: Opcode = X86::XORri16b; break; + case X86::XORri32: Opcode = X86::XORri32b; break; + } + unsigned R0 = MI->getOperand(0).getReg(); + unsigned R1 = MI->getOperand(1).getReg(); + *I = BuildMI(Opcode, 2, R0).addReg(R1).addZImm((char)Val); + delete MI; + return true; + } + } + return false; + #if 0 case X86::MOVir32: Size++; case X86::MOVir16: Size++; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 7c940ba43cc..f4774ce39c0 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -243,6 +243,8 @@ def ADDrr32 : I2A32<"add", 0x01, MRMDestReg>, Pattern<(set R32, (plus R def ADDri8 : I2A8 <"add", 0x80, MRMS0r >, Pattern<(set R8 , (plus R8 , imm))>; def ADDri16 : I2A16<"add", 0x81, MRMS0r >, OpSize, Pattern<(set R16, (plus R16, imm))>; def ADDri32 : I2A32<"add", 0x81, MRMS0r >, Pattern<(set R32, (plus R32, imm))>; +def ADDri16b : I2A8 <"add", 0x83, MRMS0r >, OpSize; // ADDri with sign extended 8 bit imm +def ADDri32b : I2A8 <"add", 0x83, MRMS0r >; def ADCrr32 : I2A32<"adc", 0x11, MRMDestReg>; // R32 += imm32+Carry @@ -252,6 +254,8 @@ def SUBrr32 : I2A32<"sub", 0x29, MRMDestReg>, Pattern<(set R32, (minus def SUBri8 : I2A8 <"sub", 0x80, MRMS5r >, Pattern<(set R8 , (minus R8 , imm))>; def SUBri16 : I2A16<"sub", 0x81, MRMS5r >, OpSize, Pattern<(set R16, (minus R16, imm))>; def SUBri32 : I2A32<"sub", 0x81, MRMS5r >, Pattern<(set R32, (minus R32, imm))>; +def SUBri16b : I2A8 <"sub", 0x83, MRMS5r >, OpSize; +def SUBri32b : I2A8 <"sub", 0x83, MRMS5r >; def SBBrr32 : I2A32<"sbb", 0x19, MRMDestReg>; // R32 -= R32+Carry @@ -259,6 +263,9 @@ def IMULrr16 : I2A16<"imul", 0xAF, MRMSrcReg>, TB, OpSize, Pattern<(set R16, (ti def IMULrr32 : I2A32<"imul", 0xAF, MRMSrcReg>, TB , Pattern<(set R32, (times R32, R32))>; def IMULri16 : I2A16<"imul", 0x69, MRMSrcReg>, OpSize; def IMULri32 : I2A32<"imul", 0x69, MRMSrcReg>; +def IMULri16b : I2A8<"imul", 0x6B, MRMSrcReg>, OpSize; +def IMULri32b : I2A8<"imul", 0x6B, MRMSrcReg>; + // Logical operators... def ANDrr8 : I2A8 <"and", 0x20, MRMDestReg>, Pattern<(set R8 , (and R8 , R8 ))>; @@ -267,6 +274,8 @@ def ANDrr32 : I2A32<"and", 0x21, MRMDestReg>, Pattern<(set R32, (and R3 def ANDri8 : I2A8 <"and", 0x80, MRMS4r >, Pattern<(set R8 , (and R8 , imm))>; def ANDri16 : I2A16<"and", 0x81, MRMS4r >, OpSize, Pattern<(set R16, (and R16, imm))>; def ANDri32 : I2A32<"and", 0x81, MRMS4r >, Pattern<(set R32, (and R32, imm))>; +def ANDri16b : I2A8 <"and", 0x83, MRMS4r >, OpSize; +def ANDri32b : I2A8 <"and", 0x83, MRMS4r >; def ORrr8 : I2A8 <"or" , 0x08, MRMDestReg>, Pattern<(set R8 , (or R8 , R8 ))>; def ORrr16 : I2A16<"or" , 0x09, MRMDestReg>, OpSize, Pattern<(set R16, (or R16, R16))>; @@ -274,6 +283,9 @@ def ORrr32 : I2A32<"or" , 0x09, MRMDestReg>, Pattern<(set R32, (or R3 def ORri8 : I2A8 <"or" , 0x80, MRMS1r >, Pattern<(set R8 , (or R8 , imm))>; def ORri16 : I2A16<"or" , 0x81, MRMS1r >, OpSize, Pattern<(set R16, (or R16, imm))>; def ORri32 : I2A32<"or" , 0x81, MRMS1r >, Pattern<(set R32, (or R32, imm))>; +def ORri16b : I2A8 <"or" , 0x83, MRMS1r >, OpSize; +def ORri32b : I2A8 <"or" , 0x83, MRMS1r >; + def XORrr8 : I2A8 <"xor", 0x30, MRMDestReg>, Pattern<(set R8 , (xor R8 , R8 ))>; def XORrr16 : I2A16<"xor", 0x31, MRMDestReg>, OpSize, Pattern<(set R16, (xor R16, R16))>; @@ -281,6 +293,8 @@ def XORrr32 : I2A32<"xor", 0x31, MRMDestReg>, Pattern<(set R32, (xor R3 def XORri8 : I2A8 <"xor", 0x80, MRMS6r >, Pattern<(set R8 , (xor R8 , imm))>; def XORri16 : I2A16<"xor", 0x81, MRMS6r >, OpSize, Pattern<(set R16, (xor R16, imm))>; def XORri32 : I2A32<"xor", 0x81, MRMS6r >, Pattern<(set R32, (xor R32, imm))>; +def XORri16b : I2A8 <"xor", 0x83, MRMS6r >, OpSize; +def XORri32b : I2A8 <"xor", 0x83, MRMS6r >; // Test instructions are just like AND, except they don't generate a result. def TESTrr8 : X86Inst<"test", 0x84, MRMDestReg, Arg8 >; // flags = R8 & R8 diff --git a/lib/Target/X86/X86PeepholeOpt.cpp b/lib/Target/X86/X86PeepholeOpt.cpp index 559baeeae9d..efb6cc3e6a3 100644 --- a/lib/Target/X86/X86PeepholeOpt.cpp +++ b/lib/Target/X86/X86PeepholeOpt.cpp @@ -51,6 +51,46 @@ bool PH::PeepholeOptimize(MachineBasicBlock &MBB, } return false; + // A large number of X86 instructions have forms which take an 8-bit + // immediate despite the fact that the operands are 16 or 32 bits. Because + // this can save three bytes of code size (and icache space), we want to + // shrink them if possible. + case X86::ADDri16: case X86::ADDri32: + case X86::SUBri16: case X86::SUBri32: + case X86::IMULri16: case X86::IMULri32: + case X86::ANDri16: case X86::ANDri32: + case X86::ORri16: case X86::ORri32: + case X86::XORri16: case X86::XORri32: + assert(MI->getNumOperands() == 3 && "These should all have 3 operands!"); + if (MI->getOperand(2).isImmediate()) { + int Val = MI->getOperand(2).getImmedValue(); + // If the value is the same when signed extended from 8 bits... + if (Val == (signed int)(signed char)Val) { + unsigned Opcode; + switch (MI->getOpcode()) { + default: assert(0 && "Unknown opcode value!"); + case X86::ADDri16: Opcode = X86::ADDri16b; break; + case X86::ADDri32: Opcode = X86::ADDri32b; break; + case X86::SUBri16: Opcode = X86::SUBri16b; break; + case X86::SUBri32: Opcode = X86::SUBri32b; break; + case X86::IMULri16: Opcode = X86::IMULri16b; break; + case X86::IMULri32: Opcode = X86::IMULri32b; break; + case X86::ANDri16: Opcode = X86::ANDri16b; break; + case X86::ANDri32: Opcode = X86::ANDri32b; break; + case X86::ORri16: Opcode = X86::ORri16b; break; + case X86::ORri32: Opcode = X86::ORri32b; break; + case X86::XORri16: Opcode = X86::XORri16b; break; + case X86::XORri32: Opcode = X86::XORri32b; break; + } + unsigned R0 = MI->getOperand(0).getReg(); + unsigned R1 = MI->getOperand(1).getReg(); + *I = BuildMI(Opcode, 2, R0).addReg(R1).addZImm((char)Val); + delete MI; + return true; + } + } + return false; + #if 0 case X86::MOVir32: Size++; case X86::MOVir16: Size++;