diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index db9716e9444..dbd4f63619d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -162,6 +162,22 @@ namespace ARMII {
     I_BitShift     = 25,
     CondShift      = 28
   };
+
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // ARM Specific MachineOperand flags.
+
+    MO_NO_FLAG,
+
+    /// MO_LO16 - On a symbol operand, this represents a relocation containing
+    /// lower 16 bit of the address. Used only via movw instruction.
+    MO_LO16,
+
+    /// MO_HI16 - On a symbol operand, this represents a relocation containing
+    /// higher 16 bit of the address. Used only via movt instruction.
+    MO_HI16
+  };
 }
 
 class ARMBaseInstrInfo : public TargetInstrInfoImpl {
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 4d0f8993e00..c929c54d489 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -75,17 +75,30 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
     }
     case ARM::t2MOVi32imm: {
       unsigned DstReg = MI.getOperand(0).getReg();
-      unsigned Imm = MI.getOperand(1).getImm();
-      unsigned Lo16 = Imm & 0xffff;
-      unsigned Hi16 = (Imm >> 16) & 0xffff;
       if (!MI.getOperand(0).isDead()) {
-        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                               TII->get(ARM::t2MOVi16), DstReg)
-                       .addImm(Lo16));
-        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                               TII->get(ARM::t2MOVTi16))
-                       .addReg(DstReg, getDefRegState(true))
-                       .addReg(DstReg).addImm(Hi16));
+        const MachineOperand &MO = MI.getOperand(1);
+        MachineInstrBuilder LO16, HI16;
+
+        LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
+                       DstReg);
+        HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
+          .addReg(DstReg, getDefRegState(true)).addReg(DstReg);
+
+        if (MO.isImm()) {
+          unsigned Imm = MO.getImm();
+          unsigned Lo16 = Imm & 0xffff;
+          unsigned Hi16 = (Imm >> 16) & 0xffff;
+          LO16 = LO16.addImm(Lo16);
+          HI16 = HI16.addImm(Hi16);
+        } else {
+          GlobalValue *GV = MO.getGlobal();
+          unsigned TF = MO.getTargetFlags();
+          LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+          HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+          // FIXME: What's about memoperands?
+        }
+        AddDefaultPred(LO16);
+        AddDefaultPred(HI16);
       }
       MI.eraseFromParent();
       Modified = true;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c7a50ed0e59..d63f3e66fa4 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -261,7 +261,9 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
     if (N.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
-    } else if (N.getOpcode() == ARMISD::Wrapper) {
+    } else if (N.getOpcode() == ARMISD::Wrapper &&
+               !(Subtarget->useMovt() &&
+                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
       Base = N.getOperand(0);
     }
     Offset = CurDAG->getRegister(0, MVT::i32);
@@ -463,7 +465,9 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
     if (N.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
-    } else if (N.getOpcode() == ARMISD::Wrapper) {
+    } else if (N.getOpcode() == ARMISD::Wrapper &&
+               !(Subtarget->useMovt() &&
+                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
       Base = N.getOperand(0);
     }
     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
@@ -558,7 +562,13 @@ ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N,
   }
 
   if (N.getOpcode() != ISD::ADD) {
-    Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N;
+    if (N.getOpcode() == ARMISD::Wrapper &&
+        !(Subtarget->useMovt() &&
+          N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+      Base = N.getOperand(0);
+    } else
+      Base = N;
+
     Offset = CurDAG->getRegister(0, MVT::i32);
     OffImm = CurDAG->getTargetConstant(0, MVT::i32);
     return true;
@@ -681,7 +691,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N,
       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
       OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
       return true;
-    } else if (N.getOpcode() == ARMISD::Wrapper) {
+    } else if (N.getOpcode() == ARMISD::Wrapper &&
+               !(Subtarget->useMovt() &&
+                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
       Base = N.getOperand(0);
       if (Base.getOpcode() == ISD::TargetConstantPool)
         return false;  // We want to select t2LDRpci instead.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 1549b637f95..c839fc65606 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -39,6 +39,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <sstream>
@@ -1356,10 +1357,17 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
                            PseudoSourceValue::getGOT(), 0);
     return Result;
   } else {
-    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
-    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                       PseudoSourceValue::getConstantPool(), 0);
+    // If we have T2 ops, we can materialize the address directly via movt/movw
+    // pair. This is always cheaper.
+    if (Subtarget->useMovt()) {
+      return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+                         DAG.getTargetGlobalAddress(GV, PtrVT));
+    } else {
+      SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                         PseudoSourceValue::getConstantPool(), 0);
+    }
   }
 }
 
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 92972f1e3a9..0a8ecc008ca 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -116,6 +116,10 @@ def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
 def CarryDefIsUnused : Predicate<"!N.getNode()->hasAnyUseOfValue(1)">;
 def CarryDefIsUsed   : Predicate<"N.getNode()->hasAnyUseOfValue(1)">;
 
+// FIXME: Eventually this will be just "hasV6T2Ops".
+def UseMovt   : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
 
@@ -204,7 +208,7 @@ def hi16 : SDNodeXForm<imm, [{
 def lo16AllZero : PatLeaf<(i32 imm), [{
   // Returns true if all low 16-bits are 0.
   return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
-  }], hi16>;
+}], hi16>;
 
 /// imm0_65535 predicate - True if the 32-bit immediate is in the range 
 /// [0.65535].
@@ -1002,7 +1006,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
 let Constraints = "$src = $dst" in
 def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
                   DPFrm, IIC_iMOVi,
-                  "movt", "\t$dst, $imm", 
+                  "movt", "\t$dst, $imm",
                   [(set GPR:$dst,
                         (or (and GPR:$src, 0xffff), 
                             lo16AllZero:$imm))]>, UnaryDP,
@@ -1603,12 +1607,6 @@ let Defs =
 // Non-Instruction Patterns
 //
 
-// ConstantPool, GlobalAddress, and JumpTable
-def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>;
-def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>;
-def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
-             (LEApcrelJT tjumptable:$dst, imm:$id)>;
-
 // Large immediate handling.
 
 // Two piece so_imms.
@@ -1638,10 +1636,19 @@ def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS),
 // FIXME: Remove this when we can do generalized remat.
 let isReMaterializable = 1 in
 def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi,
-                    "movw", "\t$dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}",
+                    "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
                      [(set GPR:$dst, (i32 imm:$src))]>,
                Requires<[IsARM, HasV6T2]>;
 
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
+            Requires<[IsARM, DontUseMovt]>;
+def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>;
+def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
+            Requires<[IsARM, UseMovt]>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+             (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
 // TODO: add,sub,and, 3-instr forms?
 
 
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index c46e30416c6..94898152992 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1181,12 +1181,6 @@ def : T2Pat<(add GPR:$LHS, t2_so_neg_imm2part:$RHS),
              (t2SUBri (t2SUBri GPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
                     (t2_so_neg_imm2part_2 imm:$RHS))>;
 
-// ConstantPool, GlobalAddress, and JumpTable
-def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>;
-def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
-def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
-            (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
-
 // 32-bit immediate using movw + movt.
 // This is a single pseudo instruction to make it re-materializable. Remove
 // when we can do generalized remat.
@@ -1195,6 +1189,16 @@ def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
                    "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
                      [(set GPR:$dst, (i32 imm:$src))]>;
 
+// ConstantPool, GlobalAddress, and JumpTable
+def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
+           Requires<[IsThumb2, DontUseMovt]>;
+def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
+def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
+           Requires<[IsThumb2, UseMovt]>;
+
+def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+            (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+
 // Pseudo instruction that combines ldr from constpool and add pc. This should
 // be expanded into two instructions late to allow if-conversion and
 // scheduling.
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 432ed78c19a..d6b072b6c27 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -27,6 +27,10 @@ UseNEONFP("arm-use-neon-fp",
           cl::desc("Use NEON for single-precision FP"),
           cl::init(false), cl::Hidden);
 
+static cl::opt<bool>
+UseMOVT("arm-use-movt",
+        cl::init(true), cl::Hidden);
+
 ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
                            bool isT)
   : ARMArchVersion(V4T)
@@ -36,6 +40,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
   , ThumbMode(Thumb1)
   , PostRAScheduler(false)
   , IsR9Reserved(ReserveR9)
+  , UseMovt(UseMOVT)
   , stackAlignment(4)
   , CPUString("generic")
   , TargetType(isELF) // Default to ELF unless otherwise specified.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 3d0e01e99b7..b2467b073b5 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -65,6 +65,10 @@ protected:
   /// IsR9Reserved - True if R9 is a not available as general purpose register.
   bool IsR9Reserved;
 
+  /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
+  /// imms (including global addresses).
+  bool UseMovt;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -130,8 +134,10 @@ protected:
 
   bool isR9Reserved() const { return IsR9Reserved; }
 
+  bool useMovt() const { return UseMovt && hasV6T2Ops(); }
+
   const std::string & getCPUString() const { return CPUString; }
-  
+
   /// enablePostRAScheduler - True at 'More' optimization.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                              TargetSubtarget::AntiDepBreakMode& Mode,
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index daffc5eb922..692bb192426 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -330,6 +330,8 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
                                  const char *Modifier) {
   const MachineOperand &MO = MI->getOperand(OpNum);
+  unsigned TF = MO.getTargetFlags();
+
   switch (MO.getType()) {
   default:
     assert(0 && "<unknown operand type>");
@@ -356,12 +358,12 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   case MachineOperand::MO_Immediate: {
     int64_t Imm = MO.getImm();
     O << '#';
-    if (Modifier) {
-      if (strcmp(Modifier, "lo16") == 0)
-        O << ":lower16:";
-      else if (strcmp(Modifier, "hi16") == 0)
-        O << ":upper16:";
-    }
+    if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+        (TF & ARMII::MO_LO16))
+      O << ":lower16:";
+    else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+             (TF & ARMII::MO_HI16))
+      O << ":upper16:";
     O << Imm;
     break;
   }
@@ -371,6 +373,13 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   case MachineOperand::MO_GlobalAddress: {
     bool isCallOp = Modifier && !strcmp(Modifier, "call");
     GlobalValue *GV = MO.getGlobal();
+
+    if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+        (TF & ARMII::MO_LO16))
+      O << ":lower16:";
+    else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+             (TF & ARMII::MO_HI16))
+      O << ":upper16:";
     O << Mang->getMangledName(GV);
 
     printOffset(MO.getOffset());
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index ad1739c6905..b2fd7b334d8 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -78,7 +78,7 @@ namespace {
     { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 0 },
     { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 0 },
     { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
-    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
+    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 1 },
     // FIXME: Do we need the 16-bit 'S' variant?
     { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0 },
     { ARM::t2MOVCCr,0,            ARM::tMOVCCr,  0,   0,    0,   0,  0,1, 0 },
@@ -413,6 +413,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
     if (MI->getOperand(2).getImm() == 0)
       return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
     break;
+  case ARM::t2MOVi16:
+    // Can convert only 'pure' immediate operands, not immediates obtained as
+    // globals' addresses.
+    if (MI->getOperand(1).isImm())
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+    break;
   }
   return false;
 }
diff --git a/test/CodeGen/ARM/movt-movw-global.ll b/test/CodeGen/ARM/movt-movw-global.ll
new file mode 100644
index 00000000000..886ff3fea7a
--- /dev/null
+++ b/test/CodeGen/ARM/movt-movw-global.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+@foo = common global i32 0                        ; <i32*> [#uses=1]
+
+define arm_aapcs_vfpcc i32* @bar1() nounwind readnone {
+entry:
+; CHECK:      movw    r0, :lower16:foo
+; CHECK-NEXT: movt    r0, :upper16:foo
+  ret i32* @foo
+}
+
+define arm_aapcs_vfpcc void @bar2(i32 %baz) nounwind {
+entry:
+; CHECK:      movw    r1, :lower16:foo
+; CHECK-NEXT: movt    r1, :upper16:foo
+  store i32 %baz, i32* @foo, align 4
+  ret void
+}