mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-08 21:32:39 +00:00
Prevent insertion of "vzeroupper" before call that preserves YMM registers, since a caller uses preserved registers across the call.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175043 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
cef6cfe4a6
commit
d29804f80d
@ -120,9 +120,19 @@ static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool clobbersAllYmmRegs(const MachineOperand &MO) {
|
||||
for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) {
|
||||
if (!MO.clobbersPhysReg(reg))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool hasYmmReg(MachineInstr *MI) {
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = MI->getOperand(i);
|
||||
if (MI->isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO))
|
||||
return true;
|
||||
if (!MO.isReg())
|
||||
continue;
|
||||
if (MO.isDebug())
|
||||
|
@ -127,3 +127,43 @@ define i32 @test_int(i32 %a, i32 %b) nounwind {
|
||||
%c = add i32 %c2, %b
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; WIN64: test_float4
|
||||
; WIN64-NOT: vzeroupper
|
||||
; WIN64: call
|
||||
; WIN64-NOT: vzeroupper
|
||||
; WIN64: call
|
||||
; WIN64: ret
|
||||
|
||||
; X64: test_float4
|
||||
; X64-NOT: vzeroupper
|
||||
; X64: call
|
||||
; X64-NOT: vzeroupper
|
||||
; X64: call
|
||||
; X64: ret
|
||||
|
||||
; X32: test_float4
|
||||
; X32: vzeroupper
|
||||
; X32: call
|
||||
; X32: vzeroupper
|
||||
; X32: call
|
||||
; X32: ret
|
||||
|
||||
declare <4 x float> @func_float4(<4 x float>, <4 x float>, <4 x float>)
|
||||
|
||||
define <8 x float> @test_float4(<8 x float> %a, <8 x float> %b, <8 x float> %c) nounwind readnone {
|
||||
entry:
|
||||
%0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%1 = shufflevector <8 x float> %b, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%2 = shufflevector <8 x float> %c, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%call.i = tail call intel_ocl_bicc <4 x float> @func_float4(<4 x float> %0, <4 x float> %1, <4 x float> %2) nounwind
|
||||
%3 = shufflevector <4 x float> %call.i, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%4 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%5 = shufflevector <8 x float> %b, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%6 = shufflevector <8 x float> %c, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%call.i2 = tail call intel_ocl_bicc <4 x float> @func_float4(<4 x float> %4, <4 x float> %5, <4 x float> %6) nounwind
|
||||
%7 = shufflevector <4 x float> %call.i2, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
||||
ret <8 x float> %8
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user