diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 901e5b9a2c4..4fbd5b1ee59 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6664,7 +6664,17 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
        (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
     return MVT::f128;
 
-  return Size >= 8 ? MVT::i64 : MVT::i32;
+  if (Size >= 8 &&
+      (memOpAlign(SrcAlign, DstAlign, 8) ||
+       (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
+    return MVT::i64;
+
+  if (Size >= 4 &&
+      (memOpAlign(SrcAlign, DstAlign, 4) ||
+       (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
+    return MVT::i64;
+
+  return MVT::Other;
 }
 
 // 12-bit optionally shifted immediates are legal for adds.
diff --git a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
new file mode 100644
index 00000000000..e71b824e097
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=arm64 -aarch64-strict-align < %s | FileCheck %s
+
+; Small (16-bytes here) unaligned memcpys should stay memcpy calls if
+; strict-alignment is turned on.
+define void @t0(i8* %out, i8* %in) {
+; CHECK-LABEL: t0:
+; CHECK:         orr w2, wzr, #0x10
+; CHECK-NEXT:    bl _memcpy
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)