diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 12d7b2ee212..0484fd01d5c 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -2691,13 +2691,20 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
-  assert((VT == MVT::i8 || VT == MVT::i16) &&
-         "unexpected type for custom-lowering vector extract");
   SDValue Vec = Op.getOperand(0);
   SDValue Lane = Op.getOperand(1);
+
+  // FIXME: This is invalid for 8 and 16-bit elements - the information about
+  // sign / zero extension is lost!
   Op = DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
   Op = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Op, DAG.getValueType(VT));
-  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
+
+  if (VT.bitsLT(MVT::i32))
+    Op = DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
+  else if (VT.bitsGT(MVT::i32))
+    Op = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op);
+
+  return Op;
 }
 
 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
diff --git a/test/CodeGen/ARM/vget_lane2.ll b/test/CodeGen/ARM/vget_lane2.ll
new file mode 100644
index 00000000000..e8aa82a9666
--- /dev/null
+++ b/test/CodeGen/ARM/vget_lane2.ll
@@ -0,0 +1,63 @@
+; RUN: llvm-as < %s | llc -mattr=+neon | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d0[1]
+  %arg0_uint16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
+  %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <4 x i16>* %arg0_uint16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_uint16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d0[1]
+  %arg0_uint8x8_t = alloca <8 x i8>               ; <<8 x i8>*> [#uses=1]
+  %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <8 x i8>* %arg0_uint8x8_t, align 8    ; <<8 x i8>> [#uses=1]
+  %1 = extractelement <8 x i8> %0, i32 1          ; <i8> [#uses=1]
+  store i8 %1, i8* %out_uint8_t, align 1
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d0[1]
+  %arg0_uint16x8_t = alloca <8 x i16>             ; <<8 x i16>*> [#uses=1]
+  %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
+  %1 = extractelement <8 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_uint16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d0[1]
+  %arg0_uint8x16_t = alloca <16 x i8>             ; <<16 x i8>*> [#uses=1]
+  %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
+  %1 = extractelement <16 x i8> %0, i32 1         ; <i8> [#uses=1]
+  store i8 %1, i8* %out_uint8_t, align 1
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}