diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6a073dbd442..5ad2036d512 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -514,6 +514,11 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
                 > VT1.getVectorElementType().getSizeInBits())
           setTruncStoreAction(VT, VT1, Expand);
       }
+
+      setOperationAction(ISD::MULHS, VT, Expand);
+      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+      setOperationAction(ISD::MULHU, VT, Expand);
+      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
     }
 
     // There is no v1i64/v2i64 multiply, expand v1i64/v2i64 to GPR i64 multiply.
diff --git a/test/CodeGen/AArch64/neon-idiv.ll b/test/CodeGen/AArch64/neon-idiv.ll
new file mode 100644
index 00000000000..9c9758a81f8
--- /dev/null
+++ b/test/CodeGen/AArch64/neon-idiv.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mattr=+neon | FileCheck %s
+; RUN: llc -mtriple=arm64-none-linux-gnu < %s -mattr=+neon | FileCheck %s
+
+define <4 x i32> @test1(<4 x i32> %a) {
+  %rem = srem <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %rem
+; CHECK-LABEL: test1
+; FIXME: Can we lower this more efficiently?
+; CHECK: mul
+; CHECK: mul
+; CHECK: mul
+; CHECK: mul
+}
+