From 531f025361555e7a695eb559ec02645c054ee146 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 23 Oct 2013 18:32:43 +0000 Subject: [PATCH] Fix PR17631 - Skip instructions added in prolog. For specific targets, prolog may insert helper function calls (e.g. _chkstk will be called when there're more than 4K bytes allocated on stack). However, these helpers don't use/def YMM/XMM registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193261 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86VZeroUpper.cpp | 11 ++++++++++- test/CodeGen/X86/pr17631.ll | 22 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/X86/pr17631.ll diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index 477f75afef2..0d37a7d0e67 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -231,8 +231,17 @@ bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, bool BBHasCall = false; for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { - MachineInstr *MI = I; DebugLoc dl = I->getDebugLoc(); + MachineInstr *MI = I; + + // Don't need to check instructions added in prolog. + // In prolog, special function calls may be added for specific targets + // (e.g. on Windows, a prolog helper '_chkstk' is called when the local + // variables exceed 4K bytes on stack.) These helpers won't use/def YMM/XMM + // registers. + if (MI->getFlag(MachineInstr::FrameSetup)) + continue; + bool isControlFlow = MI->isCall() || MI->isReturn(); // Shortcut: don't need to check regular instructions in dirty state. diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll new file mode 100644 index 00000000000..a572ff2e3b3 --- /dev/null +++ b/test/CodeGen/X86/pr17631.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mcpu=core-avx-i -mtriple=i386-pc-win32 | FileCheck %s + +%struct_type = type { [64 x <8 x float>], <8 x float> } + +; Function Attrs: nounwind readnone +declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) + +; Function Attrs: nounwind +define i32 @equal(<8 x i32> %A) { +allocas: + %first_alloc = alloca [64 x <8 x i32>] + %second_alloc = alloca %struct_type + + %A1 = bitcast <8 x i32> %A to <8 x float> + %A2 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %A1) + ret i32 %A2 +} + +; CHECK: equal +; CHECK-NOT: vzeroupper +; CHECK: _chkstk +; CHECK: ret