mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
Fix patterns for SSE4.1 move and sign extend instructions. Also add instructions which fold VZEXT_MOVL and VZEXT_LOAD.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@56594 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8a186ae4aa
commit
ca57f78332
@ -243,6 +243,16 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (ld node:$ptr)), [{
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (ld node:$ptr)), [{
|
||||
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||
if (LD->getAddressingMode() != ISD::UNINDEXED)
|
||||
return false;
|
||||
ISD::LoadExtType ExtType = LD->getExtensionType();
|
||||
if (ExtType == ISD::EXTLOAD)
|
||||
return LD->getAlignment() >= 2 && !LD->isVolatile();
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def loadi32 : PatFrag<(ops node:$ptr), (i32 (ld node:$ptr)), [{
|
||||
LoadSDNode *LD = cast<LoadSDNode>(N);
|
||||
if (LD->getAddressingMode() != ISD::UNINDEXED)
|
||||
|
@ -162,6 +162,17 @@ def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
|
||||
def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
|
||||
def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
|
||||
|
||||
def vzmovl_v2i64 : PatFrag<(ops node:$src),
|
||||
(bitconvert (v2i64 (X86vzmovl
|
||||
(v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
|
||||
def vzmovl_v4i32 : PatFrag<(ops node:$src),
|
||||
(bitconvert (v4i32 (X86vzmovl
|
||||
(v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
|
||||
|
||||
def vzload_v2i64 : PatFrag<(ops node:$src),
|
||||
(bitconvert (v2i64 (X86vzload node:$src)))>;
|
||||
|
||||
|
||||
def fp32imm0 : PatLeaf<(f32 fpimm), [{
|
||||
return N->isExactlyValue(+0.0);
|
||||
}]>;
|
||||
@ -3368,8 +3379,9 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
|
||||
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst,
|
||||
(IntId (bitconvert (v4i32 (load addr:$src)))))]>, OpSize;
|
||||
[(set VR128:$dst,
|
||||
(IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
|
||||
@ -3379,6 +3391,38 @@ defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
|
||||
defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
|
||||
defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
|
||||
|
||||
// Common patterns involving scalar load.
|
||||
def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
|
||||
(PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
|
||||
def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
|
||||
(PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
|
||||
(PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
|
||||
def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
|
||||
(PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
|
||||
(PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
|
||||
def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
|
||||
(PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
|
||||
(PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
|
||||
def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
|
||||
(PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
|
||||
(PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
|
||||
def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
|
||||
(PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
|
||||
(PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
|
||||
def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
|
||||
(PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
|
||||
|
||||
|
||||
multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
@ -3386,8 +3430,9 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
|
||||
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst,
|
||||
(IntId (bitconvert (v4i32 (load addr:$src)))))]>, OpSize;
|
||||
[(set VR128:$dst,
|
||||
(IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
|
||||
@ -3395,20 +3440,45 @@ defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
|
||||
defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
|
||||
defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
|
||||
|
||||
// Common patterns involving scalar load
|
||||
def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
|
||||
(PMOVSXBDrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
|
||||
(PMOVSXWQrm addr:$src)>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
|
||||
(PMOVZXBDrm addr:$src)>;
|
||||
def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
|
||||
(PMOVZXWQrm addr:$src)>;
|
||||
|
||||
|
||||
multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
|
||||
|
||||
// Expecting a i16 load any extended to i32 value.
|
||||
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst,
|
||||
(IntId (bitconvert (v4i32 (load addr:$src)))))]>, OpSize;
|
||||
[(set VR128:$dst, (IntId (bitconvert
|
||||
(v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
|
||||
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovsxbq", int_x86_sse41_pmovzxbq>;
|
||||
|
||||
// Common patterns involving scalar load
|
||||
def : Pat<(int_x86_sse41_pmovsxbq
|
||||
(bitconvert (v4i32 (X86vzmovl
|
||||
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
|
||||
(PMOVSXBQrm addr:$src)>;
|
||||
|
||||
def : Pat<(int_x86_sse41_pmovzxbq
|
||||
(bitconvert (v4i32 (X86vzmovl
|
||||
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
|
||||
(PMOVZXBQrm addr:$src)>;
|
||||
|
||||
|
||||
/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
|
||||
multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
|
||||
|
47
test/CodeGen/X86/sse41-pmovx.ll
Normal file
47
test/CodeGen/X86/sse41-pmovx.ll
Normal file
@ -0,0 +1,47 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | not grep movd
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | not grep movq
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | grep pmovsxbd
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | grep pmovsxwd
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 | grep pmovsxbq
|
||||
; RUN: llvm-as < %s | llc -march=x86-64 -mattr=sse41 | grep movq | count 1
|
||||
|
||||
define <2 x i64> @t1(i32* %p) nounwind {
|
||||
entry:
|
||||
%0 = load i32* %p, align 4 ; <i32> [#uses=1]
|
||||
%1 = insertelement <4 x i32> undef, i32 %0, i32 0 ; <<4 x i32>> [#uses=1]
|
||||
%2 = insertelement <4 x i32> %1, i32 0, i32 1 ; <<4 x i32>> [#uses=1]
|
||||
%3 = insertelement <4 x i32> %2, i32 0, i32 2 ; <<4 x i32>> [#uses=1]
|
||||
%4 = insertelement <4 x i32> %3, i32 0, i32 3 ; <<4 x i32>> [#uses=1]
|
||||
%5 = bitcast <4 x i32> %4 to <16 x i8> ; <<16 x i8>> [#uses=1]
|
||||
%6 = tail call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %5) nounwind readnone ; <<4 x i32>> [#uses=1]
|
||||
%7 = bitcast <4 x i32> %6 to <2 x i64> ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %7
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
|
||||
|
||||
define <2 x i64> @t2(i64* %p) nounwind readonly {
|
||||
entry:
|
||||
%0 = load i64* %p ; <i64> [#uses=1]
|
||||
%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0 ; <<2 x i64>> [#uses=1]
|
||||
%1 = bitcast <2 x i64> %tmp2 to <8 x i16> ; <<8 x i16>> [#uses=1]
|
||||
%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone ; <<4 x i32>> [#uses=1]
|
||||
%3 = bitcast <4 x i32> %2 to <2 x i64> ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %3
|
||||
}
|
||||
|
||||
declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
|
||||
|
||||
|
||||
@gv = external global i16 ; <i16*> [#uses=1]
|
||||
|
||||
define <2 x i64> @t3() nounwind {
|
||||
entry:
|
||||
%0 = load i16* @gv, align 2 ; <i16> [#uses=1]
|
||||
%1 = insertelement <8 x i16> undef, i16 %0, i32 0 ; <<8 x i16>> [#uses=1]
|
||||
%2 = bitcast <8 x i16> %1 to <16 x i8> ; <<16 x i8>> [#uses=1]
|
||||
%3 = tail call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %2) nounwind readnone ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %3
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
|
Loading…
Reference in New Issue
Block a user