diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 4d26f0f678b..fe53b0aaf80 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1920,26 +1920,19 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_xop_vfrcz_pd : - GCCBuiltin<"__builtin_ia32_vfrczpd">, + def int_x86_xop_vfrcz_pd : GCCBuiltin<"__builtin_ia32_vfrczpd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_xop_vfrcz_ps : - GCCBuiltin<"__builtin_ia32_vfrczps">, + def int_x86_xop_vfrcz_ps : GCCBuiltin<"__builtin_ia32_vfrczps">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_xop_vfrcz_sd : - GCCBuiltin<"__builtin_ia32_vfrczsd">, - Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], - [IntrNoMem]>; - def int_x86_xop_vfrcz_ss : - GCCBuiltin<"__builtin_ia32_vfrczss">, - Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty], - [IntrNoMem]>; - def int_x86_xop_vfrcz_pd_256 : - GCCBuiltin<"__builtin_ia32_vfrczpd256">, + def int_x86_xop_vfrcz_sd : GCCBuiltin<"__builtin_ia32_vfrczsd">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_xop_vfrcz_ss : GCCBuiltin<"__builtin_ia32_vfrczss">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; + def int_x86_xop_vfrcz_pd_256 : GCCBuiltin<"__builtin_ia32_vfrczpd256">, Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>; - def int_x86_xop_vfrcz_ps_256 : - GCCBuiltin<"__builtin_ia32_vfrczps256">, + def int_x86_xop_vfrcz_ps_256 : GCCBuiltin<"__builtin_ia32_vfrczps256">, Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>; + def int_x86_xop_vpcmov : GCCBuiltin<"__builtin_ia32_vpcmov">, Intrinsic<[llvm_v2i64_ty], diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td index 16a7a1a88a3..8ec2c688d33 100644 --- a/lib/Target/X86/X86InstrXOP.td +++ b/lib/Target/X86/X86InstrXOP.td @@ -39,22 +39,16 @@ let isAsmParserOnly = 1 in { } // Scalar load 2 addr operand instructions -let Constraints = "$src1 = $dst" in { multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int, Operand memop, ComplexPattern mem_cpat> { - def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, - VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX; - def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, - memop:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (Int VR128:$src1, - (bitconvert mem_cpat:$src2)))]>, VEX; + def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (Int VR128:$src))]>, VEX; + def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (Int (bitconvert mem_cpat:$src)))]>, VEX; } -} // Constraints = "$src1 = $dst" - let isAsmParserOnly = 1 in { defm VFRCZSS : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss, ssmem, sse_load_f32>; diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 4976c22ad53..5c2d63bc7c9 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -89,6 +89,19 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { if (Name == "x86.sse41.ptestnzc") return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestnzc, NewFn); } + // frcz.ss/sd may need to have an argument dropped + if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) { + F->setName(Name + ".old"); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::x86_xop_vfrcz_ss); + return true; + } + if (Name.startswith("x86.xop.vfrcz.sd") && F->arg_size() == 2) { + F->setName(Name + ".old"); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::x86_xop_vfrcz_sd); + return true; + } // Fix the FMA4 intrinsics to remove the 4 if (Name.startswith("x86.fma4.")) { F->setName("llvm.x86.fma" + Name.substr(8)); @@ -282,9 +295,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); return; + case Intrinsic::x86_xop_vfrcz_ss: + case Intrinsic::x86_xop_vfrcz_sd: + CI->replaceAllUsesWith(Builder.CreateCall(NewFn, CI->getArgOperand(1), + Name)); + CI->eraseFromParent(); + return; + case Intrinsic::x86_sse41_ptestc: case Intrinsic::x86_sse41_ptestz: - case Intrinsic::x86_sse41_ptestnzc: + case Intrinsic::x86_sse41_ptestnzc: { // The arguments for these intrinsics used to be v4f32, and changed // to v2i64. This is purely a nop, since those are bitwise intrinsics. // So, the only thing required is a bitcast for both arguments. @@ -310,6 +330,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); return; } + } } // This tests each Function to determine if it needs upgrading. When we find diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll index 1dc3f81eb55..8af782cd2f1 100644 --- a/test/CodeGen/X86/xop-intrinsics-x86_64.ll +++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll @@ -875,37 +875,37 @@ define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) { } declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone -define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0, <4 x float> %a1) { +define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0) { ; CHECK-NOT: mov ; CHECK: vfrczss - %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %a1) ; + %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) ; ret <4 x float> %res } -define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(<4 x float> %a0, float* %a1) { +define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(float* %a0) { ; CHECK-NOT: mov ; CHECK: vfrczss - %elem = load float* %a1 + %elem = load float* %a0 %vec = insertelement <4 x float> undef, float %elem, i32 0 - %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %vec) ; + %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ; ret <4 x float> %res } -declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>, <4 x float>) nounwind readnone +declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone -define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0, <2 x double> %a1) { +define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0) { ; CHECK-NOT: mov ; CHECK: vfrczsd - %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %a1) ; + %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) ; ret <2 x double> %res } -define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(<2 x double> %a0, double* %a1) { +define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(double* %a0) { ; CHECK-NOT: mov ; CHECK: vfrczsd - %elem = load double* %a1 + %elem = load double* %a0 %vec = insertelement <2 x double> undef, double %elem, i32 0 - %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %vec) ; + %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ; ret <2 x double> %res } -declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>, <2 x double>) nounwind readnone +declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) { ; CHECK: vfrczpd