diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index ed078ec1c51..d53f3291e71 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -702,6 +702,32 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; + case Intrinsic::arm_neon_vld1: + case Intrinsic::arm_neon_vld2: + case Intrinsic::arm_neon_vld3: + case Intrinsic::arm_neon_vld4: + case Intrinsic::arm_neon_vld2lane: + case Intrinsic::arm_neon_vld3lane: + case Intrinsic::arm_neon_vld4lane: + case Intrinsic::arm_neon_vst1: + case Intrinsic::arm_neon_vst2: + case Intrinsic::arm_neon_vst3: + case Intrinsic::arm_neon_vst4: + case Intrinsic::arm_neon_vst2lane: + case Intrinsic::arm_neon_vst3lane: + case Intrinsic::arm_neon_vst4lane: { + unsigned MemAlign = GetOrEnforceKnownAlignment(II->getArgOperand(0)); + unsigned AlignArg = II->getNumArgOperands() - 1; + ConstantInt *IntrAlign = dyn_cast(II->getArgOperand(AlignArg)); + if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { + II->setArgOperand(AlignArg, + ConstantInt::get(Type::getInt32Ty(II->getContext()), + MemAlign, false)); + return II; + } + break; + } + case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. diff --git a/test/Transforms/InstCombine/neon-intrinsics.ll b/test/Transforms/InstCombine/neon-intrinsics.ll new file mode 100644 index 00000000000..3ad09cc6c69 --- /dev/null +++ b/test/Transforms/InstCombine/neon-intrinsics.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +; The alignment arguments for NEON load/store intrinsics can be increased +; by instcombine. Check for this. + +; CHECK: vld4.v2i32({{.*}}, i32 32) +; CHECK: vst4.v2i32({{.*}}, i32 16) + +@x = common global [8 x i32] zeroinitializer, align 32 +@y = common global [8 x i32] zeroinitializer, align 16 + +%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } + +define void @test() nounwind ssp { + %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* bitcast ([8 x i32]* @x to i8*), i32 1) + %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 1 + %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2 + %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 3 + call void @llvm.arm.neon.vst4.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1) + ret void +} + +declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly +declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind