mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-04 05:31:51 +00:00
Make x86 fast-isel correctly choose between aligned and unaligned operations for vector stores. Fixes PR16640.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186491 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d055c59544
commit
e0364b64d1
@ -79,8 +79,10 @@ private:
|
||||
|
||||
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
|
||||
|
||||
bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
|
||||
bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
|
||||
bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
|
||||
bool Aligned = false);
|
||||
bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
|
||||
bool Aligned = false);
|
||||
|
||||
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
|
||||
unsigned &ResultReg);
|
||||
@ -233,7 +235,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
|
||||
/// and a displacement offset, or a GlobalAddress,
|
||||
/// i.e. V. Return true if it is possible.
|
||||
bool
|
||||
X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
|
||||
X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
|
||||
const X86AddressMode &AM, bool Aligned) {
|
||||
// Get opcode and regclass of the output for the given store instruction.
|
||||
unsigned Opc = 0;
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
@ -243,8 +246,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
|
||||
// Mask out all but lowest bit.
|
||||
unsigned AndResult = createResultReg(&X86::GR8RegClass);
|
||||
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
|
||||
TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
|
||||
Val = AndResult;
|
||||
TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
|
||||
ValReg = AndResult;
|
||||
}
|
||||
// FALLTHROUGH, handling i1 as i8.
|
||||
case MVT::i8: Opc = X86::MOV8mr; break;
|
||||
@ -260,26 +263,35 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
|
||||
(Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
|
||||
break;
|
||||
case MVT::v4f32:
|
||||
Opc = X86::MOVAPSmr;
|
||||
if (Aligned)
|
||||
Opc = X86::MOVAPSmr;
|
||||
else
|
||||
Opc = X86::MOVUPSmr;
|
||||
break;
|
||||
case MVT::v2f64:
|
||||
Opc = X86::MOVAPDmr;
|
||||
if (Aligned)
|
||||
Opc = X86::MOVAPSmr;
|
||||
else
|
||||
Opc = X86::MOVUPSmr;
|
||||
break;
|
||||
case MVT::v4i32:
|
||||
case MVT::v2i64:
|
||||
case MVT::v8i16:
|
||||
case MVT::v16i8:
|
||||
Opc = X86::MOVDQAmr;
|
||||
if (Aligned)
|
||||
Opc = X86::MOVDQAmr;
|
||||
else
|
||||
Opc = X86::MOVDQUmr;
|
||||
break;
|
||||
}
|
||||
|
||||
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
|
||||
DL, TII.get(Opc)), AM).addReg(Val);
|
||||
DL, TII.get(Opc)), AM).addReg(ValReg);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
|
||||
const X86AddressMode &AM) {
|
||||
const X86AddressMode &AM, bool Aligned) {
|
||||
// Handle 'null' like i32/i64 0.
|
||||
if (isa<ConstantPointerNull>(Val))
|
||||
Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
|
||||
@ -314,7 +326,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
|
||||
if (ValReg == 0)
|
||||
return false;
|
||||
|
||||
return X86FastEmitStore(VT, ValReg, AM);
|
||||
return X86FastEmitStore(VT, ValReg, AM, Aligned);
|
||||
}
|
||||
|
||||
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
|
||||
@ -688,6 +700,10 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
|
||||
if (S->isAtomic())
|
||||
return false;
|
||||
|
||||
unsigned SABIAlignment =
|
||||
TD.getABITypeAlignment(S->getValueOperand()->getType());
|
||||
bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
|
||||
|
||||
MVT VT;
|
||||
if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
|
||||
return false;
|
||||
@ -696,7 +712,7 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
|
||||
if (!X86SelectAddress(I->getOperand(1), AM))
|
||||
return false;
|
||||
|
||||
return X86FastEmitStore(VT, I->getOperand(0), AM);
|
||||
return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
|
||||
}
|
||||
|
||||
/// X86SelectRet - Select and emit code to implement ret instructions.
|
||||
|
@ -20,7 +20,7 @@ entry:
|
||||
%2 = load <4 x float>* %p3, align 16
|
||||
%3 = load <4 x float>* %p4, align 16
|
||||
%4 = load <4 x float>* %p5, align 16
|
||||
; CHECK: movaps {{%xmm[0-7]}}, (%esp)
|
||||
; CHECK: movups {{%xmm[0-7]}}, (%esp)
|
||||
; CHECK-NEXT: calll _dovectortest
|
||||
call void @dovectortest(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4)
|
||||
ret void
|
||||
|
48
test/CodeGen/X86/fast-isel-store.ll
Normal file
48
test/CodeGen/X86/fast-isel-store.ll
Normal file
@ -0,0 +1,48 @@
|
||||
; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort -mattr=+sse2 < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort -mattr=+sse2 < %s | FileCheck %s
|
||||
|
||||
define i32 @test_store_32(i32* nocapture %addr, i32 %value) {
|
||||
entry:
|
||||
store i32 %value, i32* %addr, align 1
|
||||
ret i32 %value
|
||||
}
|
||||
|
||||
; CHECK: ret
|
||||
|
||||
define i16 @test_store_16(i16* nocapture %addr, i16 %value) {
|
||||
entry:
|
||||
store i16 %value, i16* %addr, align 1
|
||||
ret i16 %value
|
||||
}
|
||||
|
||||
; CHECK: ret
|
||||
|
||||
define <4 x i32> @test_store_4xi32(<4 x i32>* nocapture %addr, <4 x i32> %value, <4 x i32> %value2) {
|
||||
; CHECK: movdqu
|
||||
; CHECK: ret
|
||||
%foo = add <4 x i32> %value, %value2 ; to force integer type on store
|
||||
store <4 x i32> %foo, <4 x i32>* %addr, align 1
|
||||
ret <4 x i32> %foo
|
||||
}
|
||||
|
||||
define <4 x i32> @test_store_4xi32_aligned(<4 x i32>* nocapture %addr, <4 x i32> %value, <4 x i32> %value2) {
|
||||
; CHECK: movdqa
|
||||
; CHECK: ret
|
||||
%foo = add <4 x i32> %value, %value2 ; to force integer type on store
|
||||
store <4 x i32> %foo, <4 x i32>* %addr, align 16
|
||||
ret <4 x i32> %foo
|
||||
}
|
||||
|
||||
define <4 x float> @test_store_4xf32(<4 x float>* nocapture %addr, <4 x float> %value) {
|
||||
; CHECK: movups
|
||||
; CHECK: ret
|
||||
store <4 x float> %value, <4 x float>* %addr, align 1
|
||||
ret <4 x float> %value
|
||||
}
|
||||
|
||||
define <4 x float> @test_store_4xf32_aligned(<4 x float>* nocapture %addr, <4 x float> %value) {
|
||||
; CHECK: movaps
|
||||
; CHECK: ret
|
||||
store <4 x float> %value, <4 x float>* %addr, align 16
|
||||
ret <4 x float> %value
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort < %s | FileCheck %s
|
||||
|
||||
define i32 @test_store_32(i32* nocapture %addr, i32 %value) {
|
||||
entry:
|
||||
store i32 %value, i32* %addr, align 1
|
||||
ret i32 %value
|
||||
}
|
||||
|
||||
; CHECK: ret
|
||||
|
||||
define i16 @test_store_16(i16* nocapture %addr, i16 %value) {
|
||||
entry:
|
||||
store i16 %value, i16* %addr, align 1
|
||||
ret i16 %value
|
||||
}
|
||||
|
||||
; CHECK: ret
|
Loading…
x
Reference in New Issue
Block a user