mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-19 06:31:18 +00:00
On Haswell, perfer storing YMM registers using a single instruction.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157129 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4fc8a5de44
commit
87d35e8c71
@ -14532,13 +14532,12 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
|
|||||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||||
|
|
||||||
// If we are saving a concatenation of two XMM registers, perform two stores.
|
// If we are saving a concatenation of two XMM registers, perform two stores.
|
||||||
// This is better in Sandy Bridge cause one 256-bit mem op is done via two
|
// On Sandy Bridge, 256-bit memory operations are executed by two
|
||||||
// 128-bit ones. If in the future the cost becomes only one memory access the
|
// 128-bit ports. However, on Haswell it is better to issue a single 256-bit
|
||||||
// first version would be better.
|
// memory operation.
|
||||||
if (VT.getSizeInBits() == 256 &&
|
if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2() &&
|
||||||
StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
|
StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
|
||||||
StoredVal.getNumOperands() == 2) {
|
StoredVal.getNumOperands() == 2) {
|
||||||
|
|
||||||
SDValue Value0 = StoredVal.getOperand(0);
|
SDValue Value0 = StoredVal.getOperand(0);
|
||||||
SDValue Value1 = StoredVal.getOperand(1);
|
SDValue Value1 = StoredVal.getOperand(1);
|
||||||
|
|
||||||
|
14
test/CodeGen/X86/2012-05-19-avx2-store.ll
Normal file
14
test/CodeGen/X86/2012-05-19-avx2-store.ll
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx2 | FileCheck %s
|
||||||
|
|
||||||
|
define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: vmovaps
|
||||||
|
; CHECK: vmovaps
|
||||||
|
; CHECK: vinsertf128
|
||||||
|
; CHECK: vmovups
|
||||||
|
%A = load <4 x i32>* %Ap
|
||||||
|
%B = load <4 x i32>* %Bp
|
||||||
|
%Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||||
|
store <8 x i32> %Z, <8 x i32>* %P, align 16
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user