From df24e1fb0812edd2bd33a3b279edb030dd30023d Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Tue, 13 Sep 2011 19:33:03 +0000 Subject: [PATCH] Add versions 256-bit versions of alignedstore and alignedload, to be more strict about the alignment checking. This was found by inspection and I don't have any testcases so far, although the llvm testsuite runs without any problem. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@139625 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFragmentsSIMD.td | 23 +++++++++++++++++------ lib/Target/X86/X86InstrSSE.td | 12 ++++++------ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index aa088a62c4a..5a093eab52e 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -204,17 +204,28 @@ def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>; def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; -// Like 'store', but always requires vector alignment. +// Like 'store', but always requires 128-bit vector alignment. def alignedstore : PatFrag<(ops node:$val, node:$ptr), (store node:$val, node:$ptr), [{ return cast(N)->getAlignment() >= 16; }]>; -// Like 'load', but always requires vector alignment. +// Like 'store', but always requires 256-bit vector alignment. +def alignedstore256 : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() >= 32; +}]>; + +// Like 'load', but always requires 128-bit vector alignment. def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return cast(N)->getAlignment() >= 16; }]>; +// Like 'load', but always requires 256-bit vector alignment. +def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 32; +}]>; + def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>; def alignedloadfsf64 : PatFrag<(ops node:$ptr), @@ -232,13 +243,13 @@ def alignedloadv2i64 : PatFrag<(ops node:$ptr), // 256-bit aligned load pattern fragments def alignedloadv8f32 : PatFrag<(ops node:$ptr), - (v8f32 (alignedload node:$ptr))>; + (v8f32 (alignedload256 node:$ptr))>; def alignedloadv4f64 : PatFrag<(ops node:$ptr), - (v4f64 (alignedload node:$ptr))>; + (v4f64 (alignedload256 node:$ptr))>; def alignedloadv8i32 : PatFrag<(ops node:$ptr), - (v8i32 (alignedload node:$ptr))>; + (v8i32 (alignedload256 node:$ptr))>; def alignedloadv4i64 : PatFrag<(ops node:$ptr), - (v4i64 (alignedload node:$ptr))>; + (v4i64 (alignedload256 node:$ptr))>; // Like 'load', but uses special alignment checks suitable for use in // memory operands in most SSE instructions, which are required to diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index c5ec346147d..6a082573605 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -717,10 +717,10 @@ def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(store (v2f64 VR128:$src), addr:$dst)]>, VEX; def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movaps\t{$src, $dst|$dst, $src}", - [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX; + [(alignedstore256 (v8f32 VR256:$src), addr:$dst)]>, VEX; def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movapd\t{$src, $dst|$dst, $src}", - [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX; + [(alignedstore256 (v4f64 VR256:$src), addr:$dst)]>, VEX; def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movups\t{$src, $dst|$dst, $src}", [(store (v8f32 VR256:$src), addr:$dst)]>, VEX; @@ -872,13 +872,13 @@ let Predicates = [HasAVX] in { (VMOVAPSYrm addr:$src)>; def : Pat<(loadv8i32 addr:$src), (VMOVUPSYrm addr:$src)>; - def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst), + def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst), (VMOVAPSYmr addr:$dst, VR256:$src)>; - def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst), + def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst), (VMOVAPSYmr addr:$dst, VR256:$src)>; - def : Pat<(alignedstore (v16i16 VR256:$src), addr:$dst), + def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), (VMOVAPSYmr addr:$dst, VR256:$src)>; - def : Pat<(alignedstore (v32i8 VR256:$src), addr:$dst), + def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), (VMOVAPSYmr addr:$dst, VR256:$src)>; def : Pat<(store (v4i64 VR256:$src), addr:$dst), (VMOVUPSYmr addr:$dst, VR256:$src)>;