From 6f9877320358587331fe4fb6bad1baa862519670 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sat, 7 Oct 2006 21:17:13 +0000 Subject: [PATCH] convert packed FP add/sub/mul/div to use a multiclass. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30815 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 95 ++++++++++++----------------------- 1 file changed, 32 insertions(+), 63 deletions(-) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 24f54fab724..84ccfeaedfc 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -288,8 +288,6 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src), "movsd {$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>; -let isTwoAddress = 1 in { - /// scalar_sse12_fp_binop_rm - Scalar SSE binops come in four basic forms: /// 1. f32 vs f64 - These come in SSE1/SSE2 forms for float/doubles. /// 2. rr vs rm - They include a reg+reg form and a ref+mem form. @@ -299,6 +297,7 @@ let isTwoAddress = 1 in { /// leave the top elements undefined. This adds another two variants of the /// above permutations, giving us 8 forms for 'instruction'. /// +let isTwoAddress = 1 in { multiclass scalar_sse12_fp_binop_rm opc, string OpcodeStr, SDNode OpNode, Intrinsic F32Int, Intrinsic F64Int, bit Commutable = 0> { @@ -573,7 +572,6 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (ops VR128:$src1, f128mem:$src2), // start with 'Fs'. // Alias instructions that map fld0 to pxor for sse. -// FIXME: remove when we can teach regalloc that xor reg, reg is ok. def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst), "pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>, TB, OpSize; @@ -912,70 +910,41 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, Requires<[HasSSE2]>; } -// Arithmetic +/// packed_sse12_fp_binop_rm - Packed SSE binops come in four basic forms: +/// 1. v4f32 vs v2f64 - These come in SSE1/SSE2 forms for float/doubles. +/// 2. rr vs rm - They include a reg+reg form and a ref+mem form. +/// let isTwoAddress = 1 in { -let isCommutable = 1 in { -def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "addps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>; -def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "addpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>; -def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "mulps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>; -def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "mulpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>; +multiclass packed_sse12_fp_binop_rm opc, string OpcodeStr, + SDNode OpNode, bit Commutable = 0> { + // Packed operation, reg+reg. + def PSrr : PSI { + let isCommutable = Commutable; + } + def PDrr : PDI { + let isCommutable = Commutable; + } + // Packed operation, reg+mem. + def PSrm : PSI; + def PDrm : PDI; +} } -def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), - "addps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v4f32 (fadd VR128:$src1, - (load addr:$src2))))]>; -def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), - "addpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2f64 (fadd VR128:$src1, - (load addr:$src2))))]>; -def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), - "mulps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v4f32 (fmul VR128:$src1, - (load addr:$src2))))]>; -def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), - "mulpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2f64 (fmul VR128:$src1, - (load addr:$src2))))]>; - -def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "divps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>; -def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), - "divps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, - (load addr:$src2))))]>; -def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "divpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>; -def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), - "divpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, - (load addr:$src2))))]>; - -def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "subps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>; -def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), - "subps {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v4f32 (fsub VR128:$src1, - (load addr:$src2))))]>; -def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), - "subpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>; -def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2), - "subpd {$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2f64 (fsub VR128:$src1, - (load addr:$src2))))]>; +defm ADD : packed_sse12_fp_binop_rm<0x58, "add", fadd, 1>; +defm MUL : packed_sse12_fp_binop_rm<0x59, "mul", fmul, 1>; +defm DIV : packed_sse12_fp_binop_rm<0x5E, "div", fdiv>; +defm SUB : packed_sse12_fp_binop_rm<0x5C, "sub", fsub>; +// Arithmetic +let isTwoAddress = 1 in { def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2), "addsubps {$src2, $dst|$dst, $src2}",