mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-26 07:24:25 +00:00
Fix intrinsics for XOP frczss/sd instructions. These instructions only take one source register and zero the upper bits of the destination rather than preserving them.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158396 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -1920,26 +1920,19 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
llvm_v8f32_ty, llvm_i8_ty],
|
llvm_v8f32_ty, llvm_i8_ty],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_xop_vfrcz_pd :
|
def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">,
|
||||||
GCCBuiltin<"__builtin_ia32_vfrczpd">,
|
|
||||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||||
def int_x86_xop_vfrcz_ps :
|
def int_x86_xop_vfrcz_ps : GCCBuiltin<"__builtin_ia32_vfrczps">,
|
||||||
GCCBuiltin<"__builtin_ia32_vfrczps">,
|
|
||||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||||
def int_x86_xop_vfrcz_sd :
|
def int_x86_xop_vfrcz_sd : GCCBuiltin<"__builtin_ia32_vfrczsd">,
|
||||||
GCCBuiltin<"__builtin_ia32_vfrczsd">,
|
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||||
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
|
def int_x86_xop_vfrcz_ss : GCCBuiltin<"__builtin_ia32_vfrczss">,
|
||||||
[IntrNoMem]>;
|
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||||
def int_x86_xop_vfrcz_ss :
|
def int_x86_xop_vfrcz_pd_256 : GCCBuiltin<"__builtin_ia32_vfrczpd256">,
|
||||||
GCCBuiltin<"__builtin_ia32_vfrczss">,
|
|
||||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
|
|
||||||
[IntrNoMem]>;
|
|
||||||
def int_x86_xop_vfrcz_pd_256 :
|
|
||||||
GCCBuiltin<"__builtin_ia32_vfrczpd256">,
|
|
||||||
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
|
||||||
def int_x86_xop_vfrcz_ps_256 :
|
def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">,
|
||||||
GCCBuiltin<"__builtin_ia32_vfrczps256">,
|
|
||||||
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
|
Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
def int_x86_xop_vpcmov :
|
def int_x86_xop_vpcmov :
|
||||||
GCCBuiltin<"__builtin_ia32_vpcmov">,
|
GCCBuiltin<"__builtin_ia32_vpcmov">,
|
||||||
Intrinsic<[llvm_v2i64_ty],
|
Intrinsic<[llvm_v2i64_ty],
|
||||||
|
@ -39,22 +39,16 @@ let isAsmParserOnly = 1 in {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Scalar load 2 addr operand instructions
|
// Scalar load 2 addr operand instructions
|
||||||
let Constraints = "$src1 = $dst" in {
|
|
||||||
multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
|
||||||
Operand memop, ComplexPattern mem_cpat> {
|
Operand memop, ComplexPattern mem_cpat> {
|
||||||
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
|
def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||||
VR128:$src2),
|
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
[(set VR128:$dst, (Int VR128:$src))]>, VEX;
|
||||||
[(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX;
|
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src),
|
||||||
def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
|
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||||
memop:$src2),
|
[(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, VEX;
|
||||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
|
||||||
[(set VR128:$dst, (Int VR128:$src1,
|
|
||||||
(bitconvert mem_cpat:$src2)))]>, VEX;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // Constraints = "$src1 = $dst"
|
|
||||||
|
|
||||||
let isAsmParserOnly = 1 in {
|
let isAsmParserOnly = 1 in {
|
||||||
defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
|
defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
|
||||||
ssmem, sse_load_f32>;
|
ssmem, sse_load_f32>;
|
||||||
|
@ -89,6 +89,19 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
|||||||
if (Name == "x86.sse41.ptestnzc")
|
if (Name == "x86.sse41.ptestnzc")
|
||||||
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
|
return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
|
||||||
}
|
}
|
||||||
|
// frcz.ss/sd may need to have an argument dropped
|
||||||
|
if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {
|
||||||
|
F->setName(Name + ".old");
|
||||||
|
NewFn = Intrinsic::getDeclaration(F->getParent(),
|
||||||
|
Intrinsic::x86_xop_vfrcz_ss);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) {
|
||||||
|
F->setName(Name + ".old");
|
||||||
|
NewFn = Intrinsic::getDeclaration(F->getParent(),
|
||||||
|
Intrinsic::x86_xop_vfrcz_sd);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
// Fix the FMA4 intrinsics to remove the 4
|
// Fix the FMA4 intrinsics to remove the 4
|
||||||
if (Name.startswith("x86.fma4.")) {
|
if (Name.startswith("x86.fma4.")) {
|
||||||
F->setName("llvm.x86.fma" + Name.substr(8));
|
F->setName("llvm.x86.fma" + Name.substr(8));
|
||||||
@ -282,9 +295,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||||||
CI->eraseFromParent();
|
CI->eraseFromParent();
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
case Intrinsic::x86_xop_vfrcz_ss:
|
||||||
|
case Intrinsic::x86_xop_vfrcz_sd:
|
||||||
|
CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1),
|
||||||
|
Name));
|
||||||
|
CI->eraseFromParent();
|
||||||
|
return;
|
||||||
|
|
||||||
case Intrinsic::x86_sse41_ptestc:
|
case Intrinsic::x86_sse41_ptestc:
|
||||||
case Intrinsic::x86_sse41_ptestz:
|
case Intrinsic::x86_sse41_ptestz:
|
||||||
case Intrinsic::x86_sse41_ptestnzc:
|
case Intrinsic::x86_sse41_ptestnzc: {
|
||||||
// The arguments for these intrinsics used to be v4f32, and changed
|
// The arguments for these intrinsics used to be v4f32, and changed
|
||||||
// to v2i64. This is purely a nop, since those are bitwise intrinsics.
|
// to v2i64. This is purely a nop, since those are bitwise intrinsics.
|
||||||
// So, the only thing required is a bitcast for both arguments.
|
// So, the only thing required is a bitcast for both arguments.
|
||||||
@ -310,6 +330,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
|||||||
CI->eraseFromParent();
|
CI->eraseFromParent();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This tests each Function to determine if it needs upgrading. When we find
|
// This tests each Function to determine if it needs upgrading. When we find
|
||||||
|
@ -875,37 +875,37 @@ define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
|
|||||||
}
|
}
|
||||||
declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
|
declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
|
||||||
|
|
||||||
define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0, <4 x float> %a1) {
|
define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0) {
|
||||||
; CHECK-NOT: mov
|
; CHECK-NOT: mov
|
||||||
; CHECK: vfrczss
|
; CHECK: vfrczss
|
||||||
%res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %a1) ;
|
%res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) ;
|
||||||
ret <4 x float> %res
|
ret <4 x float> %res
|
||||||
}
|
}
|
||||||
define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(<4 x float> %a0, float* %a1) {
|
define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(float* %a0) {
|
||||||
; CHECK-NOT: mov
|
; CHECK-NOT: mov
|
||||||
; CHECK: vfrczss
|
; CHECK: vfrczss
|
||||||
%elem = load float* %a1
|
%elem = load float* %a0
|
||||||
%vec = insertelement <4 x float> undef, float %elem, i32 0
|
%vec = insertelement <4 x float> undef, float %elem, i32 0
|
||||||
%res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %vec) ;
|
%res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ;
|
||||||
ret <4 x float> %res
|
ret <4 x float> %res
|
||||||
}
|
}
|
||||||
declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>, <4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0, <2 x double> %a1) {
|
define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0) {
|
||||||
; CHECK-NOT: mov
|
; CHECK-NOT: mov
|
||||||
; CHECK: vfrczsd
|
; CHECK: vfrczsd
|
||||||
%res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %a1) ;
|
%res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) ;
|
||||||
ret <2 x double> %res
|
ret <2 x double> %res
|
||||||
}
|
}
|
||||||
define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(<2 x double> %a0, double* %a1) {
|
define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(double* %a0) {
|
||||||
; CHECK-NOT: mov
|
; CHECK-NOT: mov
|
||||||
; CHECK: vfrczsd
|
; CHECK: vfrczsd
|
||||||
%elem = load double* %a1
|
%elem = load double* %a0
|
||||||
%vec = insertelement <2 x double> undef, double %elem, i32 0
|
%vec = insertelement <2 x double> undef, double %elem, i32 0
|
||||||
%res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %vec) ;
|
%res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ;
|
||||||
ret <2 x double> %res
|
ret <2 x double> %res
|
||||||
}
|
}
|
||||||
declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>, <2 x double>) nounwind readnone
|
declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
|
||||||
|
|
||||||
define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
|
define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
|
||||||
; CHECK: vfrczpd
|
; CHECK: vfrczpd
|
||||||
|
Reference in New Issue
Block a user