High bits of movmskp{s|d} and pmovmskb are known zero. rdar://10247336

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141371 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2011-10-07 17:21:44 +00:00
parent 6d2f9cec71
commit 7c1780c5fe
2 changed files with 58 additions and 0 deletions

View File

@ -12333,6 +12333,33 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
Mask.getBitWidth() - 1); Mask.getBitWidth() - 1);
break; break;
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
unsigned NumLoBits = 0;
switch (IntId) {
default: break;
case Intrinsic::x86_sse_movmsk_ps:
case Intrinsic::x86_avx_movmsk_ps_256:
case Intrinsic::x86_sse2_movmsk_pd:
case Intrinsic::x86_avx_movmsk_pd_256:
case Intrinsic::x86_mmx_pmovmskb:
case Intrinsic::x86_sse2_pmovmskb_128: {
// High bits of movmskp{s|d}, pmovmskb are known zero.
switch (IntId) {
case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break;
case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break;
case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break;
case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break;
case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break;
case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break;
}
KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
Mask.getBitWidth() - NumLoBits);
break;
}
}
break;
}
} }
} }

View File

@ -77,3 +77,34 @@ entry:
%shr.i = lshr i32 %2, 31 %shr.i = lshr i32 %2, 31
ret i32 %shr.i ret i32 %shr.i
} }
; rdar://10247336
; movmskp{s|d} only set low 4/2 bits, high bits are known zero
define i32 @t1(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
entry:
; CHECK: t1:
; CHECK: movmskps
; CHECK-NOT: movslq
%0 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %x) nounwind
%idxprom = sext i32 %0 to i64
%arrayidx = getelementptr inbounds i32* %indexTable, i64 %idxprom
%1 = load i32* %arrayidx, align 4
ret i32 %1
}
define i32 @t2(<4 x float> %x, i32* nocapture %indexTable) nounwind uwtable readonly ssp {
entry:
; CHECK: t2:
; CHECK: movmskpd
; CHECK-NOT: movslq
%0 = bitcast <4 x float> %x to <2 x double>
%1 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %0) nounwind
%idxprom = sext i32 %1 to i64
%arrayidx = getelementptr inbounds i32* %indexTable, i64 %idxprom
%2 = load i32* %arrayidx, align 4
ret i32 %2
}
declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone