From cc95b57d42a4af1cbb0a0e4a4efc2133116dd21c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 13 Jun 2012 07:18:53 +0000 Subject: [PATCH] Fix intrinsics for XOP frczss/sd instructions. These instructions only take one source register and zero the upper bits of the destination rather than preserving them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158396 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IntrinsicsX86.td | 25 ++++++++--------------- lib/Target/X86/X86InstrXOP.td | 18 ++++++---------- lib/VMCore/AutoUpgrade.cpp | 23 ++++++++++++++++++++- test/CodeGen/X86/xop-intrinsics-x86_64.ll | 24 +++++++++++----------- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 4d26f0f678b..fe53b0aaf80 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1920,26 +1920,19 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vfrcz_pd : - GCCBuiltin<"__builtin_ia32_vfrczpd">, + def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_xop_vfrcz_ps : - GCCBuiltin<"__builtin_ia32_vfrczps">, + def int_x86_xop_vfrcz_ps : GCCBuiltin<"__builtin_ia32_vfrczps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_xop_vfrcz_sd : - GCCBuiltin<"__builtin_ia32_vfrczsd">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], - [IntrNoMem]>; - def int_x86_xop_vfrcz_ss : - GCCBuiltin<"__builtin_ia32_vfrczss">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], - [IntrNoMem]>; - def int_x86_xop_vfrcz_pd_256 : - GCCBuiltin<"__builtin_ia32_vfrczpd256">, + def int_x86_xop_vfrcz_sd : GCCBuiltin<"__builtin_ia32_vfrczsd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_xop_vfrcz_ss : GCCBuiltin<"__builtin_ia32_vfrczss">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_xop_vfrcz_pd_256 : GCCBuiltin<"__builtin_ia32_vfrczpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_xop_vfrcz_ps_256 : - GCCBuiltin<"__builtin_ia32_vfrczps256">, + def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; + def int_x86_xop_vpcmov : GCCBuiltin<"__builtin_ia32_vpcmov">, Intrinsic<[llvm_v2i64_ty], diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td index 16a7a1a88a3..8ec2c688d33 100644 --- a/lib/Target/X86/X86InstrXOP.td +++ b/lib/Target/X86/X86InstrXOP.td @@ -39,22 +39,16 @@ let isAsmParserOnly = 1 in { } // Scalar load 2 addr operand instructions -let Constraints = "$src1 = $dst" in { multiclass xop2opsld opc, string OpcodeStr, Intrinsic Int, Operand memop, ComplexPattern mem_cpat> { - def rr : IXOP, VEX; - def rm : IXOP, VEX; + def rr : IXOP, VEX; + def rm : IXOP, VEX; } -} // Constraints = "$src1 = $dst" - let isAsmParserOnly = 1 in { defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss, ssmem, sse_load_f32>; diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 4976c22ad53..5c2d63bc7c9 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -89,6 +89,19 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { if (Name == "x86.sse41.ptestnzc") return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); } + // frcz.ss/sd may need to have an argument dropped + if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { + F->setName(Name + ".old"); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::x86_xop_vfrcz_ss); + return true; + } + if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) { + F->setName(Name + ".old"); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::x86_xop_vfrcz_sd); + return true; + } // Fix the FMA4 intrinsics to remove the 4 if (Name.startswith("x86.fma4.")) { F->setName("llvm.x86.fma" + Name.substr(8)); @@ -282,9 +295,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); return; + case Intrinsic::x86_xop_vfrcz_ss: + case Intrinsic::x86_xop_vfrcz_sd: + CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1), + Name)); + CI->eraseFromParent(); + return; + case Intrinsic::x86_sse41_ptestc: case Intrinsic::x86_sse41_ptestz: - case Intrinsic::x86_sse41_ptestnzc: + case Intrinsic::x86_sse41_ptestnzc: { // The arguments for these intrinsics used to be v4f32, and changed // to v2i64. This is purely a nop, since those are bitwise intrinsics. // So, the only thing required is a bitcast for both arguments. @@ -310,6 +330,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); return; } + } } // This tests each Function to determine if it needs upgrading. When we find diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll index 1dc3f81eb55..8af782cd2f1 100644 --- a/test/CodeGen/X86/xop-intrinsics-x86_64.ll +++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll @@ -875,37 +875,37 @@ define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) { } declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone -define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0, <4 x float> %a1) { +define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0) { ; CHECK-NOT: mov ; CHECK: vfrczss - %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %a1) ; + %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) ; ret <4 x float> %res } -define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(<4 x float> %a0, float* %a1) { +define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(float* %a0) { ; CHECK-NOT: mov ; CHECK: vfrczss - %elem = load float* %a1 + %elem = load float* %a0 %vec = insertelement <4 x float> undef, float %elem, i32 0 - %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %vec) ; + %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ; ret <4 x float> %res } -declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>, <4 x float>) nounwind readnone +declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone -define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0, <2 x double> %a1) { +define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0) { ; CHECK-NOT: mov ; CHECK: vfrczsd - %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %a1) ; + %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) ; ret <2 x double> %res } -define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(<2 x double> %a0, double* %a1) { +define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(double* %a0) { ; CHECK-NOT: mov ; CHECK: vfrczsd - %elem = load double* %a1 + %elem = load double* %a0 %vec = insertelement <2 x double> undef, double %elem, i32 0 - %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %vec) ; + %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ; ret <2 x double> %res } -declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>, <2 x double>) nounwind readnone +declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) { ; CHECK: vfrczpd