diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 8af50c152fb..47c8e197170 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -504,18 +504,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Shuffles. // FIXME: Temporary workarounds since 2-wide shuffle is broken. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_movs_d : GCCBuiltin<"__builtin_ia32_movsd">, - Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_shuf_pd : GCCBuiltin<"__builtin_ia32_shufpd">, - Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_sse2_unpckh_pd : GCCBuiltin<"__builtin_ia32_unpckhpd">, - Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_sse2_unpckl_pd : GCCBuiltin<"__builtin_ia32_unpcklpd">, - Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, - llvm_v2f64_ty], [IntrNoMem]>; def int_x86_sse2_punpckh_qdq : GCCBuiltin<"__builtin_ia32_punpckhqdq128">, Intrinsic<[llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 6f4eede2ecf..8e6d9df3827 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3051,22 +3051,6 @@ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>; // FIXME: Temporary workaround since 2-wide shuffle is broken. -def : Pat<(int_x86_sse2_movs_d VR128:$src1, VR128:$src2), - (v2f64 (MOVLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; -def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, VR128:$src2, imm:$src3), - (v2f64 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$src3))>, - Requires<[HasSSE2]>; -def : Pat<(int_x86_sse2_shuf_pd VR128:$src1, (memop addr:$src2),imm:$src3), - (v2f64 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$src3))>, - Requires<[HasSSE2]>; -def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, VR128:$src2), - (v2f64 (UNPCKHPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; -def : Pat<(int_x86_sse2_unpckh_pd VR128:$src1, (memop addr:$src2)), - (v2f64 (UNPCKHPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>; -def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, VR128:$src2), - (v2f64 (UNPCKLPDrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; -def : Pat<(int_x86_sse2_unpckl_pd VR128:$src1, (memop addr:$src2)), - (v2f64 (UNPCKLPDrm VR128:$src1, addr:$src2))>, Requires<[HasSSE2]>; def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, VR128:$src2), (v2i64 (PUNPCKHQDQrr VR128:$src1, VR128:$src2))>, Requires<[HasSSE2]>; def : Pat<(int_x86_sse2_punpckh_qdq VR128:$src1, (memop addr:$src2)), diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 38146390623..0d6ae43d0f0 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -150,7 +150,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return true; } else if (Name.compare(5,17,"x86.sse2.loadh.pd",17) == 0 || Name.compare(5,17,"x86.sse2.loadl.pd",17) == 0 || - Name.compare(5,16,"x86.sse2.movl.dq",16) == 0) { + Name.compare(5,16,"x86.sse2.movl.dq",16) == 0 || + Name.compare(5,15,"x86.sse2.movs.d",15) == 0 || + Name.compare(5,16,"x86.sse2.shuf.pd",16) == 0 || + Name.compare(5,18,"x86.sse2.unpckh.pd",18) == 0 || + Name.compare(5,18,"x86.sse2.unpckl.pd",18) == 0 ) { // Calls to these intrinsics are transformed into ShuffleVector's. NewFn = 0; return true; @@ -187,17 +191,28 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { if (!NewFn) { bool isLoadH = false, isLoadL = false, isMovL = false; + bool isMovSD = false, isShufPD = false; + bool isUnpckhPD = false, isUnpcklPD = false; if (strcmp(F->getNameStart(), "llvm.x86.sse2.loadh.pd") == 0) isLoadH = true; else if (strcmp(F->getNameStart(), "llvm.x86.sse2.loadl.pd") == 0) isLoadL = true; else if (strcmp(F->getNameStart(), "llvm.x86.sse2.movl.dq") == 0) isMovL = true; + else if (strcmp(F->getNameStart(), "llvm.x86.sse2.movs.d") == 0) + isMovSD = true; + else if (strcmp(F->getNameStart(), "llvm.x86.sse2.shuf.pd") == 0) + isShufPD = true; + else if (strcmp(F->getNameStart(), "llvm.x86.sse2.unpckh.pd") == 0) + isUnpckhPD = true; + else if (strcmp(F->getNameStart(), "llvm.x86.sse2.unpckl.pd") == 0) + isUnpcklPD = true; - if (isLoadH || isLoadL || isMovL) { + if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD || + isUnpckhPD || isUnpcklPD) { std::vector Idxs; Value *Op0 = CI->getOperand(1); - ShuffleVectorInst *SI; + ShuffleVectorInst *SI = NULL; if (isLoadH || isLoadL) { Value *Op1 = UndefValue::get(Op0->getType()); Value *Addr = new BitCastInst(CI->getOperand(2), @@ -216,7 +231,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } Value *Mask = ConstantVector::get(Idxs); SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI); - } else { + } else if (isMovL) { Constant *Zero = ConstantInt::get(Type::Int32Ty, 0); Idxs.push_back(Zero); Idxs.push_back(Zero); @@ -231,8 +246,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Idxs.push_back(ConstantInt::get(Type::Int32Ty, 3)); Value *Mask = ConstantVector::get(Idxs); SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI); + } else if (isMovSD || isUnpckhPD || isUnpcklPD) { + Value *Op1 = CI->getOperand(2); + if (isMovSD) { + Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2)); + Idxs.push_back(ConstantInt::get(Type::Int32Ty, 1)); + } else if (isUnpckhPD) { + Idxs.push_back(ConstantInt::get(Type::Int32Ty, 1)); + Idxs.push_back(ConstantInt::get(Type::Int32Ty, 3)); + } else { + Idxs.push_back(ConstantInt::get(Type::Int32Ty, 0)); + Idxs.push_back(ConstantInt::get(Type::Int32Ty, 2)); + } + Value *Mask = ConstantVector::get(Idxs); + SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI); + } else if (isShufPD) { + Value *Op1 = CI->getOperand(2); + unsigned MaskVal = cast(CI->getOperand(3))->getZExtValue(); + Idxs.push_back(ConstantInt::get(Type::Int32Ty, MaskVal & 1)); + Idxs.push_back(ConstantInt::get(Type::Int32Ty, ((MaskVal >> 1) & 1)+2)); + Value *Mask = ConstantVector::get(Idxs); + SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI); } + assert(SI && "Unexpected!"); + // Handle any uses of the old CallInst. if (!CI->use_empty()) // Replace all uses of the old call with the new cast which has the diff --git a/test/Bitcode/sse2_movs_d.ll b/test/Bitcode/sse2_movs_d.ll new file mode 100644 index 00000000000..25a35b6455c --- /dev/null +++ b/test/Bitcode/sse2_movs_d.ll @@ -0,0 +1,2 @@ +; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.movs.d} +; RUN: llvm-dis < %s.bc | grep shufflevector diff --git a/test/Bitcode/sse2_movs_d.ll.bc b/test/Bitcode/sse2_movs_d.ll.bc new file mode 100644 index 00000000000..719d5294e16 Binary files /dev/null and b/test/Bitcode/sse2_movs_d.ll.bc differ diff --git a/test/Bitcode/sse2_shuf_pd.ll b/test/Bitcode/sse2_shuf_pd.ll new file mode 100644 index 00000000000..5829edbc256 --- /dev/null +++ b/test/Bitcode/sse2_shuf_pd.ll @@ -0,0 +1,2 @@ +; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.shuf.pd} +; RUN: llvm-dis < %s.bc | grep shufflevector diff --git a/test/Bitcode/sse2_shuf_pd.ll.bc b/test/Bitcode/sse2_shuf_pd.ll.bc new file mode 100644 index 00000000000..832c39e103f Binary files /dev/null and b/test/Bitcode/sse2_shuf_pd.ll.bc differ diff --git a/test/Bitcode/sse2_unpck_pd.ll b/test/Bitcode/sse2_unpck_pd.ll new file mode 100644 index 00000000000..f4e5d540684 --- /dev/null +++ b/test/Bitcode/sse2_unpck_pd.ll @@ -0,0 +1,3 @@ +; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.unpckh.pd} +; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.unpckl.pd} +; RUN: llvm-dis < %s.bc | grep shufflevector diff --git a/test/Bitcode/sse2_unpck_pd.ll.bc b/test/Bitcode/sse2_unpck_pd.ll.bc new file mode 100644 index 00000000000..4fb829cbf71 Binary files /dev/null and b/test/Bitcode/sse2_unpck_pd.ll.bc differ