mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-20 14:29:27 +00:00
Add AutoUpgrade support for the SSE4 ptest intrinsics.
Patch by Michael Kuperstein. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158295 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
01a90f4f8f
commit
3c98ce242e
@ -25,6 +25,21 @@
|
|||||||
#include <cstring>
|
#include <cstring>
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
|
// Upgrade the declarations of the SSE4.1 functions whose arguments have
|
||||||
|
// changed their type from v4f32 to v2i64.
|
||||||
|
static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
|
||||||
|
Function *&NewFn) {
|
||||||
|
// Check whether this is an old version of the function, which received
|
||||||
|
// v4f32 arguments.
|
||||||
|
Type *Arg0Type = F->getFunctionType()->getParamType(0);
|
||||||
|
if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Yes, it's old, replace it with new version.
|
||||||
|
F->setName(F->getName() + ".old");
|
||||||
|
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||||
assert(F && "Illegal to upgrade a non-existent Function.");
|
assert(F && "Illegal to upgrade a non-existent Function.");
|
||||||
@ -65,6 +80,15 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||||||
NewFn = 0;
|
NewFn = 0;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
// SSE4.1 ptest functions may have an old signature.
|
||||||
|
if (Name.startswith("x86.sse41.ptest")) {
|
||||||
|
if (Name == "x86.sse41.ptestc")
|
||||||
|
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
|
||||||
|
if (Name == "x86.sse41.ptestz")
|
||||||
|
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
|
||||||
|
if (Name == "x86.sse41.ptestnzc")
|
||||||
|
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
|
||||||
|
}
|
||||||
// Fix the FMA4 intrinsics to remove the 4
|
// Fix the FMA4 intrinsics to remove the 4
|
||||||
if (Name.startswith("x86.fma4.")) {
|
if (Name.startswith("x86.fma4.")) {
|
||||||
F->setName("llvm.x86.fma" + Name.substr(8));
|
F->setName("llvm.x86.fma" + Name.substr(8));
|
||||||
@ -242,6 +266,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
StringRef Name = CI->getName();
|
||||||
|
|
||||||
switch (NewFn->getIntrinsicID()) {
|
switch (NewFn->getIntrinsicID()) {
|
||||||
default:
|
default:
|
||||||
llvm_unreachable("Unknown function for CallInst upgrade.");
|
llvm_unreachable("Unknown function for CallInst upgrade.");
|
||||||
@ -250,12 +276,39 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||||||
case Intrinsic::cttz:
|
case Intrinsic::cttz:
|
||||||
assert(CI->getNumArgOperands() == 1 &&
|
assert(CI->getNumArgOperands() == 1 &&
|
||||||
"Mismatch between function args and call args");
|
"Mismatch between function args and call args");
|
||||||
StringRef Name = CI->getName();
|
|
||||||
CI->setName(Name + ".old");
|
CI->setName(Name + ".old");
|
||||||
CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
|
CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
|
||||||
Builder.getFalse(), Name));
|
Builder.getFalse(), Name));
|
||||||
CI->eraseFromParent();
|
CI->eraseFromParent();
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
case Intrinsic::x86_sse41_ptestc:
|
||||||
|
case Intrinsic::x86_sse41_ptestz:
|
||||||
|
case Intrinsic::x86_sse41_ptestnzc:
|
||||||
|
// The arguments for these intrinsics used to be v4f32, and changed
|
||||||
|
// to v2i64. This is purely a nop, since those are bitwise intrinsics.
|
||||||
|
// So, the only thing required is a bitcast for both arguments.
|
||||||
|
// First, check the arguments have the old type.
|
||||||
|
Value *Arg0 = CI->getArgOperand(0);
|
||||||
|
if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Old intrinsic, add bitcasts
|
||||||
|
Value *Arg1 = CI->getArgOperand(1);
|
||||||
|
|
||||||
|
Value *BC0 =
|
||||||
|
Builder.CreateBitCast(Arg0,
|
||||||
|
VectorType::get(Type::getInt64Ty(C), 2),
|
||||||
|
"cast");
|
||||||
|
Value *BC1 =
|
||||||
|
Builder.CreateBitCast(Arg1,
|
||||||
|
VectorType::get(Type::getInt64Ty(C), 2),
|
||||||
|
"cast");
|
||||||
|
|
||||||
|
CallInst* NewCall = Builder.CreateCall2(NewFn, BC0, BC1, Name);
|
||||||
|
CI->replaceAllUsesWith(NewCall);
|
||||||
|
CI->eraseFromParent();
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
22
test/Bitcode/ptest-new.ll
Normal file
22
test/Bitcode/ptest-new.ll
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
|
||||||
|
|
||||||
|
define i32 @foo(<2 x i64> %bar) nounwind {
|
||||||
|
entry:
|
||||||
|
; CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64>
|
||||||
|
%res1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %bar, <2 x i64> %bar)
|
||||||
|
; CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64>
|
||||||
|
%res2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %bar, <2 x i64> %bar)
|
||||||
|
; CHECK: call i32 @llvm.x86.sse41.ptestnzc(<2 x i64>
|
||||||
|
%res3 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %bar, <2 x i64> %bar)
|
||||||
|
%add1 = add i32 %res1, %res2
|
||||||
|
%add2 = add i32 %add1, %res2
|
||||||
|
ret i32 %add2
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||||
|
; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
|
||||||
|
; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||||
|
|
||||||
|
declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||||
|
declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
|
||||||
|
declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
|
22
test/Bitcode/ptest-old.ll
Normal file
22
test/Bitcode/ptest-old.ll
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
|
||||||
|
|
||||||
|
define i32 @foo(<4 x float> %bar) nounwind {
|
||||||
|
entry:
|
||||||
|
; CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64>
|
||||||
|
%res1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %bar, <4 x float> %bar)
|
||||||
|
; CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64>
|
||||||
|
%res2 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %bar, <4 x float> %bar)
|
||||||
|
; CHECK: call i32 @llvm.x86.sse41.ptestnzc(<2 x i64>
|
||||||
|
%res3 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %bar, <4 x float> %bar)
|
||||||
|
%add1 = add i32 %res1, %res2
|
||||||
|
%add2 = add i32 %add1, %res2
|
||||||
|
ret i32 %add2
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||||
|
; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
|
||||||
|
; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||||
|
|
||||||
|
declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
|
||||||
|
declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
|
Loading…
x
Reference in New Issue
Block a user