From e4f506ff4ba8ddc70b6b7c77feceabb0b53b6ccf Mon Sep 17 00:00:00 2001
From: Rafael Espindola <rafael.espindola@gmail.com>
Date: Tue, 26 Oct 2010 14:09:12 +0000
Subject: [PATCH] Implement some relaxations for arithmetic instructions. The
 limitation on RIP relative relocations looks artificial, but this is a
 superset of what we were able to do before.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117364 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86AsmBackend.cpp | 104 +++++++++++++++++++++++++++----
 test/MC/ELF/relax-arith.s        |  75 ++++++++++++++++++++++
 2 files changed, 167 insertions(+), 12 deletions(-)
 create mode 100644 test/MC/ELF/relax-arith.s

diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index be5671919dc..687971d114f 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -69,16 +69,11 @@ public:
 };
 } // end anonymous namespace
 
-static unsigned getRelaxedOpcode(unsigned Op) {
+static unsigned getRelaxedOpcodeBranch(unsigned Op) {
   switch (Op) {
   default:
     return Op;
 
-  // This is used on i386 with things like addl $foo, %ebx
-  // FIXME: Should the other *i8 instructions be here too? If not, it might
-  // be better to just select X86::ADD32ri instead of X86::ADD32ri8.
-  case X86::ADD32ri8: return X86::ADD32ri;
-
   case X86::JAE_1: return X86::JAE_4;
   case X86::JA_1:  return X86::JA_4;
   case X86::JBE_1: return X86::JBE_4;
@@ -99,16 +94,101 @@ static unsigned getRelaxedOpcode(unsigned Op) {
   }
 }
 
+static unsigned getRelaxedOpcodeArith(unsigned Op) {
+  switch (Op) {
+  default:
+    return Op;
+
+    // IMUL
+  case X86::IMUL16rri8: return X86::IMUL16rri;
+  case X86::IMUL16rmi8: return X86::IMUL16rmi;
+  case X86::IMUL32rri8: return X86::IMUL32rri;
+  case X86::IMUL32rmi8: return X86::IMUL32rmi;
+  case X86::IMUL64rri8: return X86::IMUL64rri32;
+  case X86::IMUL64rmi8: return X86::IMUL64rmi32;
+
+    // AND
+  case X86::AND16ri8: return X86::AND16ri;
+  case X86::AND16mi8: return X86::AND16mi;
+  case X86::AND32ri8: return X86::AND32ri;
+  case X86::AND32mi8: return X86::AND32mi;
+  case X86::AND64ri8: return X86::AND64ri32;
+  case X86::AND64mi8: return X86::AND64mi32;
+
+    // OR
+  case X86::OR16ri8: return X86::OR16ri;
+  case X86::OR16mi8: return X86::OR16mi;
+  case X86::OR32ri8: return X86::OR32ri;
+  case X86::OR32mi8: return X86::OR32mi;
+  case X86::OR64ri8: return X86::OR64ri32;
+  case X86::OR64mi8: return X86::OR64mi32;
+
+    // XOR
+  case X86::XOR16ri8: return X86::XOR16ri;
+  case X86::XOR16mi8: return X86::XOR16mi;
+  case X86::XOR32ri8: return X86::XOR32ri;
+  case X86::XOR32mi8: return X86::XOR32mi;
+  case X86::XOR64ri8: return X86::XOR64ri32;
+  case X86::XOR64mi8: return X86::XOR64mi32;
+
+    // ADD
+  case X86::ADD16ri8: return X86::ADD16ri;
+  case X86::ADD16mi8: return X86::ADD16mi;
+  case X86::ADD32ri8: return X86::ADD32ri;
+  case X86::ADD32mi8: return X86::ADD32mi;
+  case X86::ADD64ri8: return X86::ADD64ri32;
+  case X86::ADD64mi8: return X86::ADD64mi32;
+
+    // SUB
+  case X86::SUB16ri8: return X86::SUB16ri;
+  case X86::SUB16mi8: return X86::SUB16mi;
+  case X86::SUB32ri8: return X86::SUB32ri;
+  case X86::SUB32mi8: return X86::SUB32mi;
+  case X86::SUB64ri8: return X86::SUB64ri32;
+  case X86::SUB64mi8: return X86::SUB64mi32;
+
+    // CMP
+  case X86::CMP16ri8: return X86::CMP16ri;
+  case X86::CMP16mi8: return X86::CMP16mi;
+  case X86::CMP32ri8: return X86::CMP32ri;
+  case X86::CMP32mi8: return X86::CMP32mi;
+  case X86::CMP64ri8: return X86::CMP64ri32;
+  case X86::CMP64mi8: return X86::CMP64mi32;
+  }
+}
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+  unsigned R = getRelaxedOpcodeArith(Op);
+  if (R != Op)
+    return R;
+  return getRelaxedOpcodeBranch(Op);
+}
+
 bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+  // Branches can always be relaxed.
+  if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
+    return true;
+
   // Check if this instruction is ever relaxable.
-  if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+  if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
     return false;
 
-  // If so, just assume it can be relaxed. Once we support relaxing more complex
-  // instructions we should check that the instruction actually has symbolic
-  // operands before doing this, but we need to be careful about things like
-  // PCrel.
-  return true;
+
+  // Check if it has an expression and is not RIP relative.
+  bool hasExp = false;
+  bool hasRIP = false;
+  for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
+    const MCOperand &Op = Inst.getOperand(i);
+    if (Op.isExpr())
+      hasExp = true;
+
+    if (Op.isReg() && Op.getReg() == X86::RIP)
+      hasRIP = true;
+  }
+
+  // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on
+  // how we do relaxations?
+  return hasExp && !hasRIP;
 }
 
 // FIXME: Can tblgen help at all here to verify there aren't other instructions
diff --git a/test/MC/ELF/relax-arith.s b/test/MC/ELF/relax-arith.s
new file mode 100644
index 00000000000..3236b41e532
--- /dev/null
+++ b/test/MC/ELF/relax-arith.s
@@ -0,0 +1,75 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that we correctly relax these instructions into versions that use
+// 16 or 32 bit immediate values.
+
+bar:
+// CHECK: 'imul'
+// CHECK: ('_section_data', '6669db00 0066691c 25000000 00000069 db000000 00691c25 00000000 00000000 4869db00 00000048 691c2500 00000000 000000')
+        .section imul
+        imul $foo, %bx,  %bx
+        imul $foo, bar,  %bx
+        imul $foo, %ebx, %ebx
+        imul $foo, bar,  %ebx
+        imul $foo, %rbx, %rbx
+        imul $foo, bar,  %rbx
+
+// CHECK: and'
+// CHECK:('_section_data', '6681e300 00668124 25000000 00000081 e3000000 00812425 00000000 00000000 4881e300 00000048 81242500 00000000 000000')
+        .section and
+        and  $foo, %bx
+        andw $foo, bar
+        and  $foo, %ebx
+        andl $foo, bar
+        and  $foo, %rbx
+        andq $foo, bar
+
+// CHECK: 'or'
+// CHECK: ('_section_data', '6681cb00 0066810c 25000000 00000081 cb000000 00810c25 00000000 00000000 4881cb00 00000048 810c2500 00000000 000000')
+        .section or
+        or  $foo, %bx
+        orw $foo, bar
+        or  $foo, %ebx
+        orl $foo, bar
+        or  $foo, %rbx
+        orq $foo, bar
+
+// CHECK: 'xor'
+// CHECK: ('_section_data', '6681f300 00668134 25000000 00000081 f3000000 00813425 00000000 00000000 4881f300 00000048 81342500 00000000 000000')
+        .section xor
+        xor  $foo, %bx
+        xorw $foo, bar
+        xor  $foo, %ebx
+        xorl $foo, bar
+        xor  $foo, %rbx
+        xorq $foo, bar
+
+// CHECK: 'add'
+// CHECK: ('_section_data', '6681c300 00668104 25000000 00000081 c3000000 00810425 00000000 00000000 4881c300 00000048 81042500 00000000 000000')
+        .section add
+        add  $foo, %bx
+        addw $foo, bar
+        add  $foo, %ebx
+        addl $foo, bar
+        add  $foo, %rbx
+        addq $foo, bar
+
+// CHECK: 'sub'
+// CHECK: ('_section_data', '6681eb00 0066812c 25000000 00000081 eb000000 00812c25 00000000 00000000 4881eb00 00000048 812c2500 00000000 000000')
+        .section sub
+        sub  $foo, %bx
+        subw $foo, bar
+        sub  $foo, %ebx
+        subl $foo, bar
+        sub  $foo, %rbx
+        subq $foo, bar
+
+// CHECK: 'cmp'
+// CHECK: ('_section_data', '6681fb00 0066813c 25000000 00000081 fb000000 00813c25 00000000 00000000 4881fb00 00000048 813c2500 00000000 000000')
+        .section cmp
+        cmp  $foo, %bx
+        cmpw $foo, bar
+        cmp  $foo, %ebx
+        cmpl $foo, bar
+        cmp  $foo, %rbx
+        cmpq $foo, bar