mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-02 07:11:49 +00:00
Add AutoUpgrade support for the SSE4 ptest intrinsics.
Patch by Michael Kuperstein. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158295 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
01a90f4f8f
commit
3c98ce242e
@ -25,6 +25,21 @@
|
||||
#include <cstring>
|
||||
using namespace llvm;
|
||||
|
||||
// Upgrade the declarations of the SSE4.1 functions whose arguments have
|
||||
// changed their type from v4f32 to v2i64.
|
||||
static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
|
||||
Function *&NewFn) {
|
||||
// Check whether this is an old version of the function, which received
|
||||
// v4f32 arguments.
|
||||
Type *Arg0Type = F->getFunctionType()->getParamType(0);
|
||||
if (Arg0Type != VectorType::get(Type::getFloatTy(F->getContext()), 4))
|
||||
return false;
|
||||
|
||||
// Yes, it's old, replace it with new version.
|
||||
F->setName(F->getName() + ".old");
|
||||
NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
assert(F && "Illegal to upgrade a non-existent Function.");
|
||||
@ -65,6 +80,15 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
NewFn = 0;
|
||||
return true;
|
||||
}
|
||||
// SSE4.1 ptest functions may have an old signature.
|
||||
if (Name.startswith("x86.sse41.ptest")) {
|
||||
if (Name == "x86.sse41.ptestc")
|
||||
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
|
||||
if (Name == "x86.sse41.ptestz")
|
||||
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestz, NewFn);
|
||||
if (Name == "x86.sse41.ptestnzc")
|
||||
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
|
||||
}
|
||||
// Fix the FMA4 intrinsics to remove the 4
|
||||
if (Name.startswith("x86.fma4.")) {
|
||||
F->setName("llvm.x86.fma" + Name.substr(8));
|
||||
@ -75,9 +99,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
}
|
||||
}
|
||||
|
||||
// This may not belong here. This function is effectively being overloaded
|
||||
// to both detect an intrinsic which needs upgrading, and to provide the
|
||||
// upgraded form of the intrinsic. We should perhaps have two separate
|
||||
// This may not belong here. This function is effectively being overloaded
|
||||
// to both detect an intrinsic which needs upgrading, and to provide the
|
||||
// upgraded form of the intrinsic. We should perhaps have two separate
|
||||
// functions for this.
|
||||
return false;
|
||||
}
|
||||
@ -99,8 +123,8 @@ bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
|
||||
// upgraded intrinsic. All argument and return casting must be provided in
|
||||
// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
|
||||
// upgraded intrinsic. All argument and return casting must be provided in
|
||||
// order to seamlessly integrate with existing context.
|
||||
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
Function *F = CI->getCalledFunction();
|
||||
@ -242,6 +266,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
return;
|
||||
}
|
||||
|
||||
StringRef Name = CI->getName();
|
||||
|
||||
switch (NewFn->getIntrinsicID()) {
|
||||
default:
|
||||
llvm_unreachable("Unknown function for CallInst upgrade.");
|
||||
@ -250,12 +276,39 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
case Intrinsic::cttz:
|
||||
assert(CI->getNumArgOperands() == 1 &&
|
||||
"Mismatch between function args and call args");
|
||||
StringRef Name = CI->getName();
|
||||
CI->setName(Name + ".old");
|
||||
CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
|
||||
Builder.getFalse(), Name));
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
|
||||
case Intrinsic::x86_sse41_ptestc:
|
||||
case Intrinsic::x86_sse41_ptestz:
|
||||
case Intrinsic::x86_sse41_ptestnzc:
|
||||
// The arguments for these intrinsics used to be v4f32, and changed
|
||||
// to v2i64. This is purely a nop, since those are bitwise intrinsics.
|
||||
// So, the only thing required is a bitcast for both arguments.
|
||||
// First, check the arguments have the old type.
|
||||
Value *Arg0 = CI->getArgOperand(0);
|
||||
if (Arg0->getType() != VectorType::get(Type::getFloatTy(C), 4))
|
||||
return;
|
||||
|
||||
// Old intrinsic, add bitcasts
|
||||
Value *Arg1 = CI->getArgOperand(1);
|
||||
|
||||
Value *BC0 =
|
||||
Builder.CreateBitCast(Arg0,
|
||||
VectorType::get(Type::getInt64Ty(C), 2),
|
||||
"cast");
|
||||
Value *BC1 =
|
||||
Builder.CreateBitCast(Arg1,
|
||||
VectorType::get(Type::getInt64Ty(C), 2),
|
||||
"cast");
|
||||
|
||||
CallInst* NewCall = Builder.CreateCall2(NewFn, BC0, BC1, Name);
|
||||
CI->replaceAllUsesWith(NewCall);
|
||||
CI->eraseFromParent();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
22
test/Bitcode/ptest-new.ll
Normal file
22
test/Bitcode/ptest-new.ll
Normal file
@ -0,0 +1,22 @@
|
||||
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
|
||||
|
||||
define i32 @foo(<2 x i64> %bar) nounwind {
|
||||
entry:
|
||||
; CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64>
|
||||
%res1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %bar, <2 x i64> %bar)
|
||||
; CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64>
|
||||
%res2 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %bar, <2 x i64> %bar)
|
||||
; CHECK: call i32 @llvm.x86.sse41.ptestnzc(<2 x i64>
|
||||
%res3 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %bar, <2 x i64> %bar)
|
||||
%add1 = add i32 %res1, %res2
|
||||
%add2 = add i32 %add1, %res2
|
||||
ret i32 %add2
|
||||
}
|
||||
|
||||
; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
|
22
test/Bitcode/ptest-old.ll
Normal file
22
test/Bitcode/ptest-old.ll
Normal file
@ -0,0 +1,22 @@
|
||||
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
|
||||
|
||||
define i32 @foo(<4 x float> %bar) nounwind {
|
||||
entry:
|
||||
; CHECK: call i32 @llvm.x86.sse41.ptestc(<2 x i64>
|
||||
%res1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %bar, <4 x float> %bar)
|
||||
; CHECK: call i32 @llvm.x86.sse41.ptestz(<2 x i64>
|
||||
%res2 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %bar, <4 x float> %bar)
|
||||
; CHECK: call i32 @llvm.x86.sse41.ptestnzc(<2 x i64>
|
||||
%res3 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %bar, <4 x float> %bar)
|
||||
%add1 = add i32 %res1, %res2
|
||||
%add2 = add i32 %add1, %res2
|
||||
ret i32 %add2
|
||||
}
|
||||
|
||||
; CHECK: declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
; CHECK: declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
; CHECK: declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
|
||||
declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
|
Loading…
Reference in New Issue
Block a user