mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-14 17:34:41 +00:00
Use PMULDQ for v2i64 multiplies when SSE4.1 is available. And add
load-folding table entries for PMULDQ and PMULLD. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@51489 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d4083d01d2
commit
0b924dcef8
@ -700,6 +700,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
if (Subtarget->hasSSE41()) {
|
||||
// FIXME: Do we need to handle scalar-to-vector here?
|
||||
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
|
||||
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
|
||||
|
||||
// i8 and i16 vectors are custom , because the source register and source
|
||||
// source memory operand types are not the same width. f32 vectors are
|
||||
|
@ -569,8 +569,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::PMAXUBrr, X86::PMAXUBrm },
|
||||
{ X86::PMINSWrr, X86::PMINSWrm },
|
||||
{ X86::PMINUBrr, X86::PMINUBrm },
|
||||
{ X86::PMULDQrr, X86::PMULDQrm },
|
||||
{ X86::PMULDQrr_int, X86::PMULDQrm_int },
|
||||
{ X86::PMULHUWrr, X86::PMULHUWrm },
|
||||
{ X86::PMULHWrr, X86::PMULHWrm },
|
||||
{ X86::PMULLDrr, X86::PMULLDrm },
|
||||
{ X86::PMULLDrr_int, X86::PMULLDrm_int },
|
||||
{ X86::PMULLWrr, X86::PMULLWrm },
|
||||
{ X86::PMULUDQrr, X86::PMULUDQrm },
|
||||
{ X86::PORrr, X86::PORrm },
|
||||
|
@ -3242,19 +3242,18 @@ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
|
||||
int_x86_sse41_pmaxud, 1>;
|
||||
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
|
||||
int_x86_sse41_pmaxuw, 1>;
|
||||
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq",
|
||||
int_x86_sse41_pmuldq, 1>;
|
||||
|
||||
|
||||
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
Intrinsic IntId128, bit Commutable = 0> {
|
||||
multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
|
||||
SDNode OpNode, Intrinsic IntId128,
|
||||
bit Commutable = 0> {
|
||||
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (OpNode (v4i32 VR128:$src1),
|
||||
VR128:$src2))]>, OpSize {
|
||||
[(set VR128:$dst, (OpNode (OpVT VR128:$src1),
|
||||
VR128:$src2))]>, OpSize {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
def rr_int : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
@ -3277,8 +3276,10 @@ let Constraints = "$src1 = $dst" in {
|
||||
OpSize;
|
||||
}
|
||||
}
|
||||
defm PMULLD : SS41I_binop_patint<0x40, "pmulld", mul,
|
||||
defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
|
||||
int_x86_sse41_pmulld, 1>;
|
||||
defm PMULDQ : SS41I_binop_patint<0x28, "pmuldq", v2i64, mul,
|
||||
int_x86_sse41_pmuldq, 1>;
|
||||
|
||||
|
||||
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
|
||||
|
32
test/CodeGen/X86/pmul.ll
Normal file
32
test/CodeGen/X86/pmul.ll
Normal file
@ -0,0 +1,32 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 -mattr=sse41 > %t
|
||||
; RUN: grep pmul %t | count 6
|
||||
; RUN: grep mov %t | count 8
|
||||
|
||||
define <4 x i32> @a(<4 x i32> %i) nounwind {
|
||||
%A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
|
||||
ret <4 x i32> %A
|
||||
}
|
||||
define <2 x i64> @b(<2 x i64> %i) nounwind {
|
||||
%A = mul <2 x i64> %i, < i64 117, i64 117 >
|
||||
ret <2 x i64> %A
|
||||
}
|
||||
define <4 x i32> @c(<4 x i32> %i, <4 x i32> %j) nounwind {
|
||||
%A = mul <4 x i32> %i, %j
|
||||
ret <4 x i32> %A
|
||||
}
|
||||
define <2 x i64> @d(<2 x i64> %i, <2 x i64> %j) nounwind {
|
||||
%A = mul <2 x i64> %i, %j
|
||||
ret <2 x i64> %A
|
||||
}
|
||||
; Use a call to force spills.
|
||||
declare void @foo()
|
||||
define <4 x i32> @e(<4 x i32> %i, <4 x i32> %j) nounwind {
|
||||
call void @foo()
|
||||
%A = mul <4 x i32> %i, %j
|
||||
ret <4 x i32> %A
|
||||
}
|
||||
define <2 x i64> @f(<2 x i64> %i, <2 x i64> %j) nounwind {
|
||||
call void @foo()
|
||||
%A = mul <2 x i64> %i, %j
|
||||
ret <2 x i64> %A
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user