Remove the pmulld intrinsic and autoupdate it as a vector multiply.

Rewrite the pmulld patterns, and make sure that they fold in loads of
arguments into the instruction.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@99910 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eric Christopher 2010-03-30 18:49:01 +00:00
parent 9b97a73ded
commit 8258d0b4bf
8 changed files with 58 additions and 7 deletions

View File

@ -810,9 +810,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_pmuldq : GCCBuiltin<"__builtin_ia32_pmuldq128">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
def int_x86_sse41_pmulld : GCCBuiltin<"__builtin_ia32_pmulld128">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem, Commutative]>;
}
// Vector extract

View File

@ -597,7 +597,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PMULHUWrr, X86::PMULHUWrm, 16 },
{ X86::PMULHWrr, X86::PMULHWrm, 16 },
{ X86::PMULLDrr, X86::PMULLDrm, 16 },
{ X86::PMULLDrr_int, X86::PMULLDrm_int, 16 },
{ X86::PMULLWrr, X86::PMULLWrm, 16 },
{ X86::PMULUDQrr, X86::PMULUDQrm, 16 },
{ X86::PORrr, X86::PORrm, 16 },

View File

@ -3448,8 +3448,28 @@ let Constraints = "$src1 = $dst" in {
OpSize;
}
}
defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
int_x86_sse41_pmulld, 1>;
/// SS48I_binop_rm - Simple SSE41 binary operator.
let Constraints = "$src1 = $dst" in {
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
OpSize {
let isCommutable = Commutable;
}
def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpNode VR128:$src1,
(bc_v4i32 (memopv2i64 addr:$src2))))]>,
OpSize;
}
}
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>;
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
let Constraints = "$src1 = $dst" in {

View File

@ -225,7 +225,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
// Calls to these intrinsics are transformed into ShuffleVector's.
NewFn = 0;
return true;
} else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) {
// Calls to these intrinsics are transformed into vector multiplies.
NewFn = 0;
return true;
}
break;
}
@ -355,6 +360,18 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Clean up the old call now that it has been completely upgraded.
CI->eraseFromParent();
} else if (F->getName() == "llvm.x86.sse41.pmulld") {
// Upgrade this set of intrinsics into vector multiplies.
Instruction *Mul = BinaryOperator::CreateMul(CI->getOperand(1),
CI->getOperand(2),
CI->getName(),
CI);
// Fix up all the uses with our new multiply.
if (!CI->use_empty())
CI->replaceAllUsesWith(Mul);
// Remove upgraded multiply.
CI->eraseFromParent();
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}

View File

@ -0,0 +1,2 @@
; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.pmulld}
; RUN: llvm-dis < %s.bc | grep mul

Binary file not shown.

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 > %t
; RUN: grep pmul %t | count 12
; RUN: grep mov %t | count 12
; RUN: grep mov %t | count 11
define <4 x i32> @a(<4 x i32> %i) nounwind {
%A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >

View File

@ -0,0 +1,16 @@
; RUN: llc < %s -march=x86-64 -mattr=+sse41 -asm-verbose=0 | FileCheck %s
define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
; CHECK: test1:
; CHECK-NEXT: pmulld
%C = mul <4 x i32> %A, %B
ret <4 x i32> %C
}
define <4 x i32> @test1a(<4 x i32> %A, <4 x i32> *%Bp) nounwind {
; CHECK: test1a:
; CHECK-NEXT: pmulld
%B = load <4 x i32>* %Bp
%C = mul <4 x i32> %A, %B
ret <4 x i32> %C
}