From 5fc2187a025bb77b9023239edf12868d833630fe Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 10 May 2012 12:22:05 +0000 Subject: [PATCH] Generate AVX/AVX2 shuffles even when there is a memory op somewhere else in the program. Starting r155461 we are able to select patterns for vbroadcast even when the load op is used by other users. Fix PR11900. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@156539 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 4 ---- test/CodeGen/X86/avx-vbroadcast.ll | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 32fb5ceebeb..4a312f4eb56 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5029,10 +5029,6 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const { if (!ISD::isNormalLoad(Ld.getNode())) return SDValue(); - // Reject loads that have uses of the chain result - if (Ld->hasAnyUseOfValue(1)) - return SDValue(); - unsigned ScalarSize = Ld.getValueType().getSizeInBits(); if (ScalarSize == 32 || (Is256 && ScalarSize == 64)) diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll index 148ae7329f4..26ee1d33a17 100644 --- a/test/CodeGen/X86/avx-vbroadcast.ll +++ b/test/CodeGen/X86/avx-vbroadcast.ll @@ -112,3 +112,20 @@ entry: %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1 ret <2 x double> %vecinit2.i } + +; CHECK: _RR +; CHECK: vbroadcastss (% +; CHECK: ret +define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp { +entry: + %q = load float* %ptr, align 4 + %vecinit.i = insertelement <4 x float> undef, float %q, i32 0 + %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1 + %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2 + %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3 + ; force a chain + %j = load i32* %k, align 4 + store i32 %j, i32* undef + ret <4 x float> %vecinit6.i +} +