From 2d524b0765145f1c7888166c985a25452f16b2bc Mon Sep 17 00:00:00 2001
From: Kevin Enderby <enderby@apple.com>
Date: Thu, 3 May 2012 22:41:56 +0000
Subject: [PATCH] Fix issues with the ARM bl and blx thumb instructions and the
 J1 and J2 bits for the assembler and disassembler.  Which were not being
 set/read correctly for offsets greater than 22 bits in some cases.

Changes to lib/Target/ARM/ARMAsmBackend.cpp from Gideon Myles!


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@156118 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMInstrThumb.td               | 15 ++--
 .../ARM/Disassembler/ARMDisassembler.cpp      | 38 +++++++-
 lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 88 ++++++++++++-------
 test/MC/ARM/basic-thumb-instructions.s        |  4 +-
 test/MC/Disassembler/ARM/thumb-tests.txt      |  8 ++
 test/MC/MachO/ARM/thumb-bl-jbits.s            | 19 ++++
 6 files changed, 128 insertions(+), 44 deletions(-)
 create mode 100644 test/MC/MachO/ARM/thumb-bl-jbits.s

diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index fe553c21502..ce6785a2981 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -427,11 +427,11 @@ let isCall = 1,
                   "bl${p}\t$func",
                   [(ARMtcall tglobaladdr:$func)]>,
              Requires<[IsThumb]> {
-    bits<22> func;
-    let Inst{26} = func{21};
+    bits<24> func;
+    let Inst{26} = func{23};
     let Inst{25-16} = func{20-11};
-    let Inst{13} = 1;
-    let Inst{11} = 1;
+    let Inst{13} = func{22};
+    let Inst{11} = func{21};
     let Inst{10-0} = func{10-0};
   }
 
@@ -441,10 +441,11 @@ let isCall = 1,
                    "blx${p}\t$func",
                    [(ARMcall tglobaladdr:$func)]>,
               Requires<[IsThumb, HasV5T]> {
-    bits<21> func;
+    bits<24> func;
+    let Inst{26} = func{23};
     let Inst{25-16} = func{20-11};
-    let Inst{13} = 1;
-    let Inst{11} = 1;
+    let Inst{13} = func{22};
+    let Inst{11} = func{21};
     let Inst{10-1} = func{10-1};
     let Inst{0} = 0; // func{0} is assumed zero
   }
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 28ea0d256ed..80b2b235bed 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -3339,10 +3339,25 @@ static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
+  // Val is passed in as S:J1:J2:imm10H:imm10L:’0’
+  // Note only one trailing zero not two.  Also the J1 and J2 values are from
+  // the encoded instruction.  So here change to I1 and I2 values via:
+  // I1 = NOT(J1 EOR S);
+  // I2 = NOT(J2 EOR S);
+  // and build the imm32 with two trailing zeros as documented:
+  // imm32 = SignExtend(S:I1:I2:imm10H:imm10L:’00’, 32);
+  unsigned S = (Val >> 23) & 1;
+  unsigned J1 = (Val >> 22) & 1;
+  unsigned J2 = (Val >> 21) & 1;
+  unsigned I1 = !(J1 ^ S);
+  unsigned I2 = !(J2 ^ S);
+  unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21);
+  int imm32 = SignExtend32<25>(tmp << 1);
+
   if (!tryAddingSymbolicOperand(Address,
-                                (Address & ~2u) + SignExtend32<22>(Val << 1) + 4,
+                                (Address & ~2u) + imm32 + 4,
                                 true, 4, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+    Inst.addOperand(MCOperand::CreateImm(imm32));
   return MCDisassembler::Success;
 }
 
@@ -3456,9 +3471,24 @@ DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
 
 static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
                                        uint64_t Address, const void *Decoder){
-  if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4,
+  // Val is passed in as S:J1:J2:imm10:imm11
+  // Note no trailing zero after imm11.  Also the J1 and J2 values are from
+  // the encoded instruction.  So here change to I1 and I2 values via:
+  // I1 = NOT(J1 EOR S);
+  // I2 = NOT(J2 EOR S);
+  // and build the imm32 with one trailing zero as documented:
+  // imm32 = SignExtend(S:I1:I2:imm10:imm11:’0’, 32);
+  unsigned S = (Val >> 23) & 1;
+  unsigned J1 = (Val >> 22) & 1;
+  unsigned J2 = (Val >> 21) & 1;
+  unsigned I1 = !(J1 ^ S);
+  unsigned I2 = !(J2 ^ S);
+  unsigned tmp = (Val & ~0x600000) | (I1 << 22) | (I2 << 21);
+  int imm32 = SignExtend32<25>(tmp << 1);
+
+  if (!tryAddingSymbolicOperand(Address, Address + imm32 + 4,
                                 true, 4, Inst, Decoder))
-    Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+    Inst.addOperand(MCOperand::CreateImm(imm32));
   return MCDisassembler::Success;
 }
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 95506775af4..ac6ce642dfa 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -394,39 +394,65 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     return swapped;
   }
   case ARM::fixup_arm_thumb_bl: {
-    // The value doesn't encode the low bit (always zero) and is offset by
-    // four. The value is encoded into disjoint bit positions in the destination
-    // opcode. x = unchanged, I = immediate value bit, S = sign extension bit
-    //
-    //   BL:  xxxxxSIIIIIIIIII xxxxxIIIIIIIIIII
-    //
-    // Note that the halfwords are stored high first, low second; so we need
-    // to transpose the fixup value here to map properly.
-    unsigned isNeg = (int64_t(Value - 4) < 0) ? 1 : 0;
-    uint32_t Binary = 0;
-    Value = 0x3fffff & ((Value - 4) >> 1);
-    Binary  = (Value & 0x7ff) << 16;    // Low imm11 value.
-    Binary |= (Value & 0x1ffc00) >> 11; // High imm10 value.
-    Binary |= isNeg << 10;              // Sign bit.
-    return Binary;
+     // The value doesn't encode the low bit (always zero) and is offset by
+     // four. The 32-bit immediate value is encoded as
+     //   imm32 = SignExtend(S:I1:I2:imm10:imm11:0)
+     // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S).
+     // The value is encoded into disjoint bit positions in the destination
+     // opcode. x = unchanged, I = immediate value bit, S = sign extension bit,
+     // J = either J1 or J2 bit
+     //
+     //   BL:  xxxxxSIIIIIIIIII xxJxJIIIIIIIIIII
+     //
+     // Note that the halfwords are stored high first, low second; so we need
+     // to transpose the fixup value here to map properly.
+     uint32_t offset = (Value - 4) >> 1;
+     uint32_t signBit = (offset & 0x800000) >> 23;
+     uint32_t I1Bit = (offset & 0x400000) >> 22;
+     uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit;
+     uint32_t I2Bit = (offset & 0x200000) >> 21;
+     uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
+     uint32_t imm10Bits = (offset & 0x1FF800) >> 11;
+     uint32_t imm11Bits = (offset & 0x000007FF);
+ 
+     uint32_t Binary = 0;
+     uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
+     uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+                           (uint16_t)imm11Bits);
+     Binary |= secondHalf << 16;
+     Binary |= firstHalf;
+     return Binary;
+
   }
   case ARM::fixup_arm_thumb_blx: {
-    // The value doesn't encode the low two bits (always zero) and is offset by
-    // four (see fixup_arm_thumb_cp). The value is encoded into disjoint bit
-    // positions in the destination opcode. x = unchanged, I = immediate value
-    // bit, S = sign extension bit, 0 = zero.
-    //
-    //   BLX: xxxxxSIIIIIIIIII xxxxxIIIIIIIIII0
-    //
-    // Note that the halfwords are stored high first, low second; so we need
-    // to transpose the fixup value here to map properly.
-    unsigned isNeg = (int64_t(Value-4) < 0) ? 1 : 0;
-    uint32_t Binary = 0;
-    Value = 0xfffff & ((Value - 2) >> 2);
-    Binary  = (Value & 0x3ff) << 17;    // Low imm10L value.
-    Binary |= (Value & 0xffc00) >> 10;  // High imm10H value.
-    Binary |= isNeg << 10;              // Sign bit.
-    return Binary;
+     // The value doesn't encode the low two bits (always zero) and is offset by
+     // four (see fixup_arm_thumb_cp). The 32-bit immediate value is encoded as
+     //   imm32 = SignExtend(S:I1:I2:imm10H:imm10L:00)
+     // where I1 = NOT(J1 ^ S) and I2 = NOT(J2 ^ S).
+     // The value is encoded into disjoint bit positions in the destination 
+     // opcode. x = unchanged, I = immediate value bit, S = sign extension bit, 
+     // J = either J1 or J2 bit, 0 = zero.
+     //
+     //   BLX: xxxxxSIIIIIIIIII xxJxJIIIIIIIIII0
+     //
+     // Note that the halfwords are stored high first, low second; so we need
+     // to transpose the fixup value here to map properly.
+     uint32_t offset = (Value - 2) >> 2;
+     uint32_t signBit = (offset & 0x400000) >> 22;
+     uint32_t I1Bit = (offset & 0x200000) >> 21;
+     uint32_t J1Bit = (I1Bit ^ 0x1) ^ signBit;
+     uint32_t I2Bit = (offset & 0x100000) >> 20;
+     uint32_t J2Bit = (I2Bit ^ 0x1) ^ signBit;
+     uint32_t imm10HBits = (offset & 0xFFC00) >> 10;
+     uint32_t imm10LBits = (offset & 0x3FF);
+ 
+     uint32_t Binary = 0;
+     uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
+     uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | 
+                           ((uint16_t)imm10LBits) << 1);
+     Binary |= secondHalf << 16;
+     Binary |= firstHalf;
+     return Binary;
   }
   case ARM::fixup_arm_thumb_cp:
     // Offset by 4, and don't encode the low two bits. Two bytes of that
diff --git a/test/MC/ARM/basic-thumb-instructions.s b/test/MC/ARM/basic-thumb-instructions.s
index 231b3338e4e..4ee34ce6b4c 100644
--- a/test/MC/ARM/basic-thumb-instructions.s
+++ b/test/MC/ARM/basic-thumb-instructions.s
@@ -169,9 +169,9 @@ _func:
         bl _bar
         blx _baz
 
-@ CHECK: bl	_bar                    @ encoding: [A,0xf0'A',A,0xf8'A']
+@ CHECK: bl	_bar                    @ encoding: [A,0xf0'A',A,0xd0'A']
              @   fixup A - offset: 0, value: _bar, kind: fixup_arm_thumb_bl
-@ CHECK: blx	_baz                    @ encoding: [A,0xf0'A',A,0xe8'A']
+@ CHECK: blx	_baz                    @ encoding: [A,0xf0'A',A,0xc0'A']
              @   fixup A - offset: 0, value: _baz, kind: fixup_arm_thumb_blx
 
 
diff --git a/test/MC/Disassembler/ARM/thumb-tests.txt b/test/MC/Disassembler/ARM/thumb-tests.txt
index e17891d5a96..c08585a3719 100644
--- a/test/MC/Disassembler/ARM/thumb-tests.txt
+++ b/test/MC/Disassembler/ARM/thumb-tests.txt
@@ -301,3 +301,11 @@
 
 # CHECK: mrs    r0, apsr
 0xef 0xf3 0x00 0x80
+
+# rdar://11313994
+# CHECK: blx	#2313244
+0x34 0xf2 0x0e 0xee
+
+# rdar://11324693
+# CHECK: bl	#-12303196
+0x44 0xf4 0x52 0xda
diff --git a/test/MC/MachO/ARM/thumb-bl-jbits.s b/test/MC/MachO/ARM/thumb-bl-jbits.s
new file mode 100644
index 00000000000..9657968db5e
--- /dev/null
+++ b/test/MC/MachO/ARM/thumb-bl-jbits.s
@@ -0,0 +1,19 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -filetype=obj -o - < %s | macho-dump --dump-section-data | FileCheck %s
+.thumb
+.thumb_func t
+t:	nop
+
+.data
+.space 4441096 - 4 - 2
+
+.section __TEXT, __branch, regular, pure_instructions
+.thumb
+.thumb_func b
+b:
+	bl	t
+# CHECK: '_section_data', 'c3f7fcf5'
+# We are checking that the branch and link instruction which is:
+#	bl	#-4441096
+# has it displacement encoded correctly with respect to the J1 and J2 bits when
+# the branch is assembled with a label not a displacement.
+# rdar://10149689