mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-27 14:34:58 +00:00
Add support for the form of the SSE41 extractps instruction that
puts its result in a 32-bit GPR. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49762 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bcda285fcc
commit
171c11ec93
@ -3833,11 +3833,13 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDOperand Op,
|
||||
} else if (VT == MVT::f32) {
|
||||
// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
|
||||
// the result back to FR32 register. It's only worth matching if the
|
||||
// result has a single use which is a store.
|
||||
// result has a single use which is a store or a bitcast to i32.
|
||||
if (!Op.hasOneUse())
|
||||
return SDOperand();
|
||||
SDNode *User = Op.Val->use_begin()->getUser();
|
||||
if (User->getOpcode() != ISD::STORE)
|
||||
if (User->getOpcode() != ISD::STORE &&
|
||||
(User->getOpcode() != ISD::BIT_CONVERT ||
|
||||
User->getValueType(0) != MVT::i32))
|
||||
return SDOperand();
|
||||
SDOperand Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32,
|
||||
DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, Op.getOperand(0)),
|
||||
|
@ -3387,13 +3387,12 @@ defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
|
||||
/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
|
||||
/// destination
|
||||
multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
|
||||
// Not worth matching to rr form of extractps since the result is in GPR32.
|
||||
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
|
||||
(ins VR128:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[/*(set GR32:$dst,
|
||||
(extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))*/]>,
|
||||
[(set GR32:$dst,
|
||||
(extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
|
||||
OpSize;
|
||||
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
||||
(ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
|
||||
|
12
test/CodeGen/X86/sse41-extractps-bitcast-0.ll
Normal file
12
test/CodeGen/X86/sse41-extractps-bitcast-0.ll
Normal file
@ -0,0 +1,12 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | grep extractps | count 2
|
||||
|
||||
define i32 @foo(<4 x float> %v) {
|
||||
%s = extractelement <4 x float> %v, i32 3
|
||||
%i = bitcast float %s to i32
|
||||
ret i32 %i
|
||||
}
|
||||
define i32 @boo(<4 x float> %v) {
|
||||
%t = bitcast <4 x float> %v to <4 x i32>
|
||||
%s = extractelement <4 x i32> %t, i32 3
|
||||
ret i32 %s
|
||||
}
|
19
test/CodeGen/X86/sse41-extractps-bitcast-1.ll
Normal file
19
test/CodeGen/X86/sse41-extractps-bitcast-1.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | not grep extractps
|
||||
|
||||
; The non-store form of extractps puts its result into a GPR.
|
||||
; This makes it suitable for an extract from a <4 x float> that
|
||||
; is bitcasted to i32, but unsuitable for much of anything else.
|
||||
|
||||
define float @bar(<4 x float> %v) {
|
||||
%s = extractelement <4 x float> %v, i32 3
|
||||
%t = add float %s, 1.0
|
||||
ret float %t
|
||||
}
|
||||
define float @baz(<4 x float> %v) {
|
||||
%s = extractelement <4 x float> %v, i32 3
|
||||
ret float %s
|
||||
}
|
||||
define i32 @qux(<4 x i32> %v) {
|
||||
%i = extractelement <4 x i32> %v, i32 3
|
||||
ret i32 %i
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user