mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
Remove the unaligned load intrinsics in favor of using native unaligned loads.
Now that we have a first-class way to represent unaligned loads, the unaligned load intrinsics are superfluous. First part of <rdar://problem/8460511>. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129401 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0b756349a7
commit
d5f323d70b
@ -138,12 +138,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
llvm_x86mmx_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// SIMD load ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_sse_loadu_ps : GCCBuiltin<"__builtin_ia32_loadups">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||
}
|
||||
|
||||
// SIMD store ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_sse_storeu_ps : GCCBuiltin<"__builtin_ia32_storeups">,
|
||||
@ -452,14 +446,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// SIMD load ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_sse2_loadu_pd : GCCBuiltin<"__builtin_ia32_loadupd">,
|
||||
Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||
def int_x86_sse2_loadu_dq : GCCBuiltin<"__builtin_ia32_loaddqu">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||
}
|
||||
|
||||
// SIMD store ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_sse2_storeu_pd : GCCBuiltin<"__builtin_ia32_storeupd">,
|
||||
|
@ -916,7 +916,6 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
|
||||
case X86::MOVSDrm:
|
||||
case X86::MOVAPSrm:
|
||||
case X86::MOVUPSrm:
|
||||
case X86::MOVUPSrm_Int:
|
||||
case X86::MOVAPDrm:
|
||||
case X86::MOVDQArm:
|
||||
case X86::MMX_MOVD64rm:
|
||||
@ -2845,11 +2844,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
|
||||
case X86::FsMOVAPDrm:
|
||||
case X86::MOVAPSrm:
|
||||
case X86::MOVUPSrm:
|
||||
case X86::MOVUPSrm_Int:
|
||||
case X86::MOVAPDrm:
|
||||
case X86::MOVDQArm:
|
||||
case X86::MOVDQUrm:
|
||||
case X86::MOVDQUrm_Int:
|
||||
break;
|
||||
}
|
||||
switch (Opc2) {
|
||||
@ -2869,11 +2866,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
|
||||
case X86::FsMOVAPDrm:
|
||||
case X86::MOVAPSrm:
|
||||
case X86::MOVUPSrm:
|
||||
case X86::MOVUPSrm_Int:
|
||||
case X86::MOVAPDrm:
|
||||
case X86::MOVDQArm:
|
||||
case X86::MOVDQUrm:
|
||||
case X86::MOVDQUrm_Int:
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -329,15 +329,6 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||
|
||||
// Intrinsic forms of MOVUPS/D load and store
|
||||
let isAsmParserOnly = 0 in {
|
||||
let canFoldAsLoad = 1, isReMaterializable = 1 in
|
||||
def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins f128mem:$src),
|
||||
"movups\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX;
|
||||
def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins f128mem:$src),
|
||||
"movupd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX;
|
||||
def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
|
||||
(ins f128mem:$dst, VR128:$src),
|
||||
"movups\t{$src, $dst|$dst, $src}",
|
||||
@ -347,13 +338,6 @@ let isAsmParserOnly = 0 in {
|
||||
"movupd\t{$src, $dst|$dst, $src}",
|
||||
[(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
|
||||
}
|
||||
let canFoldAsLoad = 1, isReMaterializable = 1 in
|
||||
def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"movups\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
|
||||
def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
|
||||
"movupd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
|
||||
|
||||
def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
||||
"movups\t{$src, $dst|$dst, $src}",
|
||||
@ -2229,22 +2213,12 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
|
||||
|
||||
// Intrinsic forms of MOVDQU load and store
|
||||
let isAsmParserOnly = 0 in {
|
||||
let canFoldAsLoad = 1 in
|
||||
def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"vmovdqu\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
|
||||
XS, VEX, Requires<[HasAVX]>;
|
||||
def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
|
||||
"vmovdqu\t{$src, $dst|$dst, $src}",
|
||||
[(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
|
||||
XS, VEX, Requires<[HasAVX]>;
|
||||
}
|
||||
|
||||
let canFoldAsLoad = 1 in
|
||||
def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
|
||||
"movdqu\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
|
||||
XS, Requires<[HasSSE2]>;
|
||||
def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
|
||||
"movdqu\t{$src, $dst|$dst, $src}",
|
||||
[(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
|
||||
|
@ -537,11 +537,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
break;
|
||||
case Intrinsic::ppc_altivec_lvx:
|
||||
case Intrinsic::ppc_altivec_lvxl:
|
||||
case Intrinsic::x86_sse_loadu_ps:
|
||||
case Intrinsic::x86_sse2_loadu_pd:
|
||||
case Intrinsic::x86_sse2_loadu_dq:
|
||||
// Turn PPC lvx -> load if the pointer is known aligned.
|
||||
// Turn X86 loadups -> load if the pointer is known aligned.
|
||||
if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
|
||||
Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
|
||||
PointerType::getUnqual(II->getType()));
|
||||
|
@ -572,9 +572,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
|
||||
switch (II->getIntrinsicID()) {
|
||||
default: break;
|
||||
case Intrinsic::prefetch:
|
||||
case Intrinsic::x86_sse2_loadu_dq:
|
||||
case Intrinsic::x86_sse2_loadu_pd:
|
||||
case Intrinsic::x86_sse_loadu_ps:
|
||||
case Intrinsic::x86_sse_storeu_ps:
|
||||
case Intrinsic::x86_sse2_storeu_pd:
|
||||
case Intrinsic::x86_sse2_storeu_dq:
|
||||
|
@ -527,6 +527,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
// or 0.
|
||||
NewFn = 0;
|
||||
return true;
|
||||
} else if (Name.compare(5, 16, "x86.sse.loadu.ps", 16) == 0 ||
|
||||
Name.compare(5, 17, "x86.sse2.loadu.dq", 17) == 0 ||
|
||||
Name.compare(5, 17, "x86.sse2.loadu.pd", 17) == 0) {
|
||||
// Calls to these instructions are transformed into unaligned loads.
|
||||
NewFn = 0;
|
||||
return true;
|
||||
} else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
|
||||
// This is an SSE/MMX instruction.
|
||||
const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
|
||||
@ -947,6 +953,28 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
// Remove upgraded instruction.
|
||||
CI->eraseFromParent();
|
||||
|
||||
} else if (F->getName() == "llvm.x86.sse.loadu.ps" ||
|
||||
F->getName() == "llvm.x86.sse2.loadu.dq" ||
|
||||
F->getName() == "llvm.x86.sse2.loadu.pd") {
|
||||
// Convert to a native, unaligned load.
|
||||
const Type *VecTy = CI->getType();
|
||||
const Type *IntTy = IntegerType::get(C, 128);
|
||||
IRBuilder<> Builder(C);
|
||||
Builder.SetInsertPoint(CI->getParent(), CI);
|
||||
|
||||
Value *BC = Builder.CreateBitCast(CI->getArgOperand(0),
|
||||
PointerType::getUnqual(IntTy),
|
||||
"cast");
|
||||
LoadInst *LI = Builder.CreateLoad(BC, CI->getName());
|
||||
LI->setAlignment(1); // Unaligned load.
|
||||
BC = Builder.CreateBitCast(LI, VecTy, "new.cast");
|
||||
|
||||
// Fix up all the uses with our new load.
|
||||
if (!CI->use_empty())
|
||||
CI->replaceAllUsesWith(BC);
|
||||
|
||||
// Remove intrinsic.
|
||||
CI->eraseFromParent();
|
||||
} else {
|
||||
llvm_unreachable("Unknown function for CallInst upgrade.");
|
||||
}
|
||||
|
@ -7,6 +7,8 @@
|
||||
; RUN: llvm-as < %s | llvm-dis | \
|
||||
; RUN: not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
|
||||
; RUN: llvm-as < %s | llvm-dis | \
|
||||
; RUN: not grep {llvm\\.x86\\.sse2\\.loadu}
|
||||
; RUN: llvm-as < %s | llvm-dis | \
|
||||
; RUN: grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16
|
||||
|
||||
declare i32 @llvm.ctpop.i28(i28 %val)
|
||||
@ -79,3 +81,13 @@ define void @sh64(<1 x i64> %A, <2 x i32> %B) {
|
||||
%r2 = call <1 x i64> @llvm.x86.mmx.psrl.q( <1 x i64> %A, <2 x i32> %B ) ; <<1 x i64>> [#uses=0]
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readnone
|
||||
declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readnone
|
||||
declare <2 x double> @llvm.x86.sse2.loadu.pd(double*) nounwind readnone
|
||||
define void @test_loadu(i8* %a, double* %b) {
|
||||
%v0 = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a)
|
||||
%v1 = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a)
|
||||
%v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b)
|
||||
ret void
|
||||
}
|
||||
|
@ -247,7 +247,7 @@ declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind
|
||||
|
||||
define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) {
|
||||
; CHECK: movl
|
||||
; CHECK: vmovdqu
|
||||
; CHECK: vmovups
|
||||
%res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
@ -256,7 +256,7 @@ declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly
|
||||
|
||||
define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) {
|
||||
; CHECK: movl
|
||||
; CHECK: vmovupd
|
||||
; CHECK: vmovups
|
||||
%res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1]
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user