mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
Because we promote SSE logical ops and loads to v2i64, we often end up generate
code that cross integer / floating point domains (e.g. generate pxor / pand for logical ops on floating point value, movdqa to load / store floating point SSE values). Given that, it's better to use movaps instead of movdqa and movups instead of movdqu. They have the same latency but the "aps" variants are one byte shorter. If the domain crossing problem is a real performance issue, then we will have to fix it with dynamic programming based isel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@40076 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
102dc195b6
commit
b4162fd393
@ -1632,17 +1632,17 @@ def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"movdqa {$src, $dst|$dst, $src}", []>;
|
||||
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"movdqa {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (alignedloadv2i64 addr:$src))]>;
|
||||
[/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
|
||||
def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
|
||||
"movdqa {$src, $dst|$dst, $src}",
|
||||
[(alignedstore (v2i64 VR128:$src), addr:$dst)]>;
|
||||
[/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
|
||||
def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"movdqu {$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (loadv2i64 addr:$src))]>,
|
||||
[/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
|
||||
XS, Requires<[HasSSE2]>;
|
||||
def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
|
||||
"movdqu {$src, $dst|$dst, $src}",
|
||||
[(store (v2i64 VR128:$src), addr:$dst)]>,
|
||||
[/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
|
||||
XS, Requires<[HasSSE2]>;
|
||||
|
||||
// Intrinsic forms of MOVDQU load and store
|
||||
@ -2375,13 +2375,6 @@ def : Pat<(v4i32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v2i64 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(v4f32 immAllOnesV), (V_SETALLONES)>, Requires<[HasSSE1]>;
|
||||
|
||||
// Store 128-bit integer vector values.
|
||||
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
|
||||
(MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
|
||||
(MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
|
||||
(MOVDQAmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
|
||||
// Scalar to v8i16 / v16i8. The source may be a GR32, but only the lower 8 or
|
||||
// 16-bits matter.
|
||||
@ -2641,3 +2634,26 @@ def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
|
||||
// Unaligned load
|
||||
def : Pat<(v4f32 (X86loadu addr:$src)), (MOVUPSrm addr:$src)>,
|
||||
Requires<[HasSSE1]>;
|
||||
|
||||
// Use movaps / movups for SSE integer load / store (one byte shorter).
|
||||
def : Pat<(alignedloadv2i64 addr:$src),
|
||||
(MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(loadv2i64 addr:$src),
|
||||
(MOVUPSrm addr:$src)>, Requires<[HasSSE2]>;
|
||||
|
||||
def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
|
||||
(MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
|
||||
(MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
|
||||
(MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
|
||||
(MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(store (v2i64 VR128:$src), addr:$dst),
|
||||
(MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
|
||||
(MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
|
||||
(MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
|
||||
(MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||
|
Loading…
Reference in New Issue
Block a user