mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-05 12:31:33 +00:00
Whitespace and 80-col cleanup.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77718 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
84a832f927
commit
44b93ff9ad
@ -1,10 +1,10 @@
|
|||||||
//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===//
|
//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===//
|
||||||
//
|
//
|
||||||
// The LLVM Compiler Infrastructure
|
// The LLVM Compiler Infrastructure
|
||||||
//
|
//
|
||||||
// This file is distributed under the University of Illinois Open Source
|
// This file is distributed under the University of Illinois Open Source
|
||||||
// License. See LICENSE.TXT for details.
|
// License. See LICENSE.TXT for details.
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
//
|
//
|
||||||
// This file describes the X86 SSE instruction set, defining the instructions,
|
// This file describes the X86 SSE instruction set, defining the instructions,
|
||||||
@ -36,20 +36,20 @@ def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
|
|||||||
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
|
def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
|
||||||
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
|
def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
|
||||||
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
|
def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
|
||||||
def X86pshufb : SDNode<"X86ISD::PSHUFB",
|
def X86pshufb : SDNode<"X86ISD::PSHUFB",
|
||||||
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
||||||
SDTCisSameAs<0,2>]>>;
|
SDTCisSameAs<0,2>]>>;
|
||||||
def X86pextrb : SDNode<"X86ISD::PEXTRB",
|
def X86pextrb : SDNode<"X86ISD::PEXTRB",
|
||||||
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
|
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
|
||||||
def X86pextrw : SDNode<"X86ISD::PEXTRW",
|
def X86pextrw : SDNode<"X86ISD::PEXTRW",
|
||||||
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
|
SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
|
||||||
def X86pinsrb : SDNode<"X86ISD::PINSRB",
|
def X86pinsrb : SDNode<"X86ISD::PINSRB",
|
||||||
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
||||||
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
|
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
|
||||||
def X86pinsrw : SDNode<"X86ISD::PINSRW",
|
def X86pinsrw : SDNode<"X86ISD::PINSRW",
|
||||||
SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
|
SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
|
||||||
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
|
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
|
||||||
def X86insrtps : SDNode<"X86ISD::INSERTPS",
|
def X86insrtps : SDNode<"X86ISD::INSERTPS",
|
||||||
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
|
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
|
||||||
SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
|
SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
|
||||||
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
|
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
|
||||||
@ -182,13 +182,13 @@ def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
|
|||||||
return getI8Imm(X86::getShuffleSHUFImmediate(N));
|
return getI8Imm(X86::getShuffleSHUFImmediate(N));
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
|
// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
|
||||||
// PSHUFHW imm.
|
// PSHUFHW imm.
|
||||||
def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
|
def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
|
||||||
return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
|
return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
|
// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
|
||||||
// PSHUFLW imm.
|
// PSHUFLW imm.
|
||||||
def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
|
def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
|
||||||
return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
|
return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
|
||||||
@ -363,25 +363,25 @@ def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
|||||||
[(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
|
[(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
|
||||||
def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
|
def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
|
||||||
"cvtps2pi\t{$src, $dst|$dst, $src}",
|
"cvtps2pi\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR64:$dst, (int_x86_sse_cvtps2pi
|
[(set VR64:$dst, (int_x86_sse_cvtps2pi
|
||||||
(load addr:$src)))]>;
|
(load addr:$src)))]>;
|
||||||
def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
||||||
"cvttps2pi\t{$src, $dst|$dst, $src}",
|
"cvttps2pi\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>;
|
[(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>;
|
||||||
def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
|
def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
|
||||||
"cvttps2pi\t{$src, $dst|$dst, $src}",
|
"cvttps2pi\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR64:$dst, (int_x86_sse_cvttps2pi
|
[(set VR64:$dst, (int_x86_sse_cvttps2pi
|
||||||
(load addr:$src)))]>;
|
(load addr:$src)))]>;
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
|
def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR64:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, VR64:$src2),
|
||||||
"cvtpi2ps\t{$src2, $dst|$dst, $src2}",
|
"cvtpi2ps\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
|
[(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
|
||||||
VR64:$src2))]>;
|
VR64:$src2))]>;
|
||||||
def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
|
def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
|
||||||
"cvtpi2ps\t{$src2, $dst|$dst, $src2}",
|
"cvtpi2ps\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
|
[(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
|
||||||
(load addr:$src2)))]>;
|
(load addr:$src2)))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -410,11 +410,11 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
|
|
||||||
// Comparison instructions
|
// Comparison instructions
|
||||||
let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
|
let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
|
||||||
def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
|
def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
|
||||||
(outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
|
(outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
|
||||||
"cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
|
"cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
|
||||||
let mayLoad = 1 in
|
let mayLoad = 1 in
|
||||||
def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
|
def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
|
||||||
(outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
|
(outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
|
||||||
"cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
|
"cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
|
||||||
}
|
}
|
||||||
@ -431,13 +431,15 @@ def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
|
|||||||
|
|
||||||
// Aliases to match intrinsics which expect XMM operand(s).
|
// Aliases to match intrinsics which expect XMM operand(s).
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
|
def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src,
|
||||||
|
SSECC:$cc),
|
||||||
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
|
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
|
[(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
|
||||||
VR128:$src, imm:$cc))]>;
|
VR128:$src, imm:$cc))]>;
|
||||||
def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
|
def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, f32mem:$src, SSECC:$cc),
|
(outs VR128:$dst), (ins VR128:$src1, f32mem:$src,
|
||||||
|
SSECC:$cc),
|
||||||
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
|
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
|
[(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
|
||||||
(load addr:$src), imm:$cc))]>;
|
(load addr:$src), imm:$cc))]>;
|
||||||
@ -463,8 +465,8 @@ def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
|
|||||||
(implicit EFLAGS)]>;
|
(implicit EFLAGS)]>;
|
||||||
} // Defs = [EFLAGS]
|
} // Defs = [EFLAGS]
|
||||||
|
|
||||||
// Aliases of packed SSE1 instructions for scalar use. These all have names that
|
// Aliases of packed SSE1 instructions for scalar use. These all have names
|
||||||
// start with 'Fs'.
|
// that start with 'Fs'.
|
||||||
|
|
||||||
// Alias instructions that map fld0 to pxor for sse.
|
// Alias instructions that map fld0 to pxor for sse.
|
||||||
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
|
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
|
||||||
@ -474,7 +476,7 @@ def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins),
|
|||||||
|
|
||||||
// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
|
// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
|
||||||
// disregarded.
|
// disregarded.
|
||||||
let neverHasSideEffects = 1 in
|
let neverHasSideEffects = 1 in
|
||||||
def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
|
def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
|
||||||
"movaps\t{$src, $dst|$dst, $src}", []>;
|
"movaps\t{$src, $dst|$dst, $src}", []>;
|
||||||
|
|
||||||
@ -555,7 +557,7 @@ multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
|||||||
(ins FR32:$src1, f32mem:$src2),
|
(ins FR32:$src1, f32mem:$src2),
|
||||||
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
|
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
|
||||||
[(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
|
[(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
|
||||||
|
|
||||||
// Vector operation, reg+reg.
|
// Vector operation, reg+reg.
|
||||||
def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
|
def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2),
|
(ins VR128:$src1, VR128:$src2),
|
||||||
@ -619,7 +621,7 @@ multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
|||||||
(ins FR32:$src1, f32mem:$src2),
|
(ins FR32:$src1, f32mem:$src2),
|
||||||
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
|
!strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
|
||||||
[(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
|
[(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
|
||||||
|
|
||||||
// Vector operation, reg+reg.
|
// Vector operation, reg+reg.
|
||||||
def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
|
def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2),
|
(ins VR128:$src1, VR128:$src2),
|
||||||
@ -674,7 +676,7 @@ defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
|
|||||||
// SSE packed FP Instructions
|
// SSE packed FP Instructions
|
||||||
|
|
||||||
// Move Instructions
|
// Move Instructions
|
||||||
let neverHasSideEffects = 1 in
|
let neverHasSideEffects = 1 in
|
||||||
def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||||
"movaps\t{$src, $dst|$dst, $src}", []>;
|
"movaps\t{$src, $dst|$dst, $src}", []>;
|
||||||
let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
|
let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
|
||||||
@ -711,13 +713,13 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
def MOVLPSrm : PSI<0x12, MRMSrcMem,
|
def MOVLPSrm : PSI<0x12, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
||||||
"movlps\t{$src2, $dst|$dst, $src2}",
|
"movlps\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(movlp VR128:$src1,
|
(movlp VR128:$src1,
|
||||||
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
|
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
|
||||||
def MOVHPSrm : PSI<0x16, MRMSrcMem,
|
def MOVHPSrm : PSI<0x16, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
||||||
"movhps\t{$src2, $dst|$dst, $src2}",
|
"movhps\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(movhp VR128:$src1,
|
(movhp VR128:$src1,
|
||||||
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
|
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
|
||||||
} // AddedComplexity
|
} // AddedComplexity
|
||||||
@ -792,7 +794,7 @@ multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
|
|||||||
def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
|
def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
|
||||||
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
|
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
|
||||||
[(set FR32:$dst, (OpNode (load addr:$src)))]>;
|
[(set FR32:$dst, (OpNode (load addr:$src)))]>;
|
||||||
|
|
||||||
// Vector operation, reg.
|
// Vector operation, reg.
|
||||||
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||||
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
|
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
|
||||||
@ -893,12 +895,12 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
|
def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
|
||||||
"cmp${cc}ps\t{$src, $dst|$dst, $src}",
|
"cmp${cc}ps\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
|
[(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
|
||||||
VR128:$src, imm:$cc))]>;
|
VR128:$src, imm:$cc))]>;
|
||||||
def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
|
def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
|
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
|
||||||
"cmp${cc}ps\t{$src, $dst|$dst, $src}",
|
"cmp${cc}ps\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
|
[(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
|
||||||
@ -912,13 +914,13 @@ def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
|
|||||||
// Shuffle and unpack instructions
|
// Shuffle and unpack instructions
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
let isConvertibleToThreeAddress = 1 in // Convert to pshufd
|
let isConvertibleToThreeAddress = 1 in // Convert to pshufd
|
||||||
def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
|
def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1,
|
(outs VR128:$dst), (ins VR128:$src1,
|
||||||
VR128:$src2, i8imm:$src3),
|
VR128:$src2, i8imm:$src3),
|
||||||
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
|
(v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
|
||||||
def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
|
def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1,
|
(outs VR128:$dst), (ins VR128:$src1,
|
||||||
f128mem:$src2, i8imm:$src3),
|
f128mem:$src2, i8imm:$src3),
|
||||||
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
"shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||||
@ -927,24 +929,24 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
VR128:$src1, (memopv4f32 addr:$src2))))]>;
|
VR128:$src1, (memopv4f32 addr:$src2))))]>;
|
||||||
|
|
||||||
let AddedComplexity = 10 in {
|
let AddedComplexity = 10 in {
|
||||||
def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
|
def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||||
"unpckhps\t{$src2, $dst|$dst, $src2}",
|
"unpckhps\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
|
(v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
|
||||||
def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
|
def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
||||||
"unpckhps\t{$src2, $dst|$dst, $src2}",
|
"unpckhps\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v4f32 (unpckh VR128:$src1,
|
(v4f32 (unpckh VR128:$src1,
|
||||||
(memopv4f32 addr:$src2))))]>;
|
(memopv4f32 addr:$src2))))]>;
|
||||||
|
|
||||||
def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
|
def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||||
"unpcklps\t{$src2, $dst|$dst, $src2}",
|
"unpcklps\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
|
(v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
|
||||||
def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
|
def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
||||||
"unpcklps\t{$src2, $dst|$dst, $src2}",
|
"unpcklps\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
@ -1049,14 +1051,14 @@ let AddedComplexity = 20 in
|
|||||||
def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
|
def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
|
||||||
"movss\t{$src, $dst|$dst, $src}",
|
"movss\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
|
[(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
|
||||||
(loadf32 addr:$src))))))]>;
|
(loadf32 addr:$src))))))]>;
|
||||||
|
|
||||||
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
|
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
|
||||||
(MOVZSS2PSrm addr:$src)>;
|
(MOVZSS2PSrm addr:$src)>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
// SSE2 Instructions
|
// SSE2 Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
// Move Instructions
|
// Move Instructions
|
||||||
let neverHasSideEffects = 1 in
|
let neverHasSideEffects = 1 in
|
||||||
@ -1080,7 +1082,7 @@ def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
|
|||||||
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
|
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
|
||||||
"cvtsd2ss\t{$src, $dst|$dst, $src}",
|
"cvtsd2ss\t{$src, $dst|$dst, $src}",
|
||||||
[(set FR32:$dst, (fround FR64:$src))]>;
|
[(set FR32:$dst, (fround FR64:$src))]>;
|
||||||
def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
|
def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
|
||||||
"cvtsd2ss\t{$src, $dst|$dst, $src}",
|
"cvtsd2ss\t{$src, $dst|$dst, $src}",
|
||||||
[(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
|
[(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
|
||||||
def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
|
def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
|
||||||
@ -1115,21 +1117,21 @@ def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
|||||||
[(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
|
[(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
|
||||||
def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
|
def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
|
||||||
"cvtpd2pi\t{$src, $dst|$dst, $src}",
|
"cvtpd2pi\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR64:$dst, (int_x86_sse_cvtpd2pi
|
[(set VR64:$dst, (int_x86_sse_cvtpd2pi
|
||||||
(memop addr:$src)))]>;
|
(memop addr:$src)))]>;
|
||||||
def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
|
||||||
"cvttpd2pi\t{$src, $dst|$dst, $src}",
|
"cvttpd2pi\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>;
|
[(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>;
|
||||||
def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
|
def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
|
||||||
"cvttpd2pi\t{$src, $dst|$dst, $src}",
|
"cvttpd2pi\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR64:$dst, (int_x86_sse_cvttpd2pi
|
[(set VR64:$dst, (int_x86_sse_cvttpd2pi
|
||||||
(memop addr:$src)))]>;
|
(memop addr:$src)))]>;
|
||||||
def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
|
def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
|
||||||
"cvtpi2pd\t{$src, $dst|$dst, $src}",
|
"cvtpi2pd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>;
|
[(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>;
|
||||||
def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
||||||
"cvtpi2pd\t{$src, $dst|$dst, $src}",
|
"cvtpi2pd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse_cvtpi2pd
|
[(set VR128:$dst, (int_x86_sse_cvtpi2pd
|
||||||
(load addr:$src)))]>;
|
(load addr:$src)))]>;
|
||||||
|
|
||||||
// Aliases for intrinsics
|
// Aliases for intrinsics
|
||||||
@ -1144,11 +1146,11 @@ def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
|
|||||||
|
|
||||||
// Comparison instructions
|
// Comparison instructions
|
||||||
let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
|
let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
|
||||||
def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
|
def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
|
||||||
(outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
|
(outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
|
||||||
"cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
|
"cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
|
||||||
let mayLoad = 1 in
|
let mayLoad = 1 in
|
||||||
def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
|
def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
|
||||||
(outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
|
(outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
|
||||||
"cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
|
"cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
|
||||||
}
|
}
|
||||||
@ -1165,13 +1167,15 @@ def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
|
|||||||
|
|
||||||
// Aliases to match intrinsics which expect XMM operand(s).
|
// Aliases to match intrinsics which expect XMM operand(s).
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
|
def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src,
|
||||||
|
SSECC:$cc),
|
||||||
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
|
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
|
[(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
|
||||||
VR128:$src, imm:$cc))]>;
|
VR128:$src, imm:$cc))]>;
|
||||||
def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
|
def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src, SSECC:$cc),
|
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src,
|
||||||
|
SSECC:$cc),
|
||||||
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
|
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
|
[(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
|
||||||
(load addr:$src), imm:$cc))]>;
|
(load addr:$src), imm:$cc))]>;
|
||||||
@ -1197,8 +1201,8 @@ def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
|
|||||||
(implicit EFLAGS)]>;
|
(implicit EFLAGS)]>;
|
||||||
} // Defs = [EFLAGS]
|
} // Defs = [EFLAGS]
|
||||||
|
|
||||||
// Aliases of packed SSE2 instructions for scalar use. These all have names that
|
// Aliases of packed SSE2 instructions for scalar use. These all have names
|
||||||
// start with 'Fs'.
|
// that start with 'Fs'.
|
||||||
|
|
||||||
// Alias instructions that map fld0 to pxor for sse.
|
// Alias instructions that map fld0 to pxor for sse.
|
||||||
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
|
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
|
||||||
@ -1289,7 +1293,7 @@ multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
|||||||
(ins FR64:$src1, f64mem:$src2),
|
(ins FR64:$src1, f64mem:$src2),
|
||||||
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
|
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
|
||||||
[(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
|
[(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
|
||||||
|
|
||||||
// Vector operation, reg+reg.
|
// Vector operation, reg+reg.
|
||||||
def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
|
def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2),
|
(ins VR128:$src1, VR128:$src2),
|
||||||
@ -1353,7 +1357,7 @@ multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
|
|||||||
(ins FR64:$src1, f64mem:$src2),
|
(ins FR64:$src1, f64mem:$src2),
|
||||||
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
|
!strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
|
||||||
[(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
|
[(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
|
||||||
|
|
||||||
// Vector operation, reg+reg.
|
// Vector operation, reg+reg.
|
||||||
def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
|
def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2),
|
(ins VR128:$src1, VR128:$src2),
|
||||||
@ -1405,7 +1409,7 @@ defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
|
|||||||
defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
|
defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
|
||||||
int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
|
int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
// SSE packed FP Instructions
|
// SSE packed FP Instructions
|
||||||
|
|
||||||
// Move Instructions
|
// Move Instructions
|
||||||
@ -1445,13 +1449,13 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
def MOVLPDrm : PDI<0x12, MRMSrcMem,
|
def MOVLPDrm : PDI<0x12, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
||||||
"movlpd\t{$src2, $dst|$dst, $src2}",
|
"movlpd\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2f64 (movlp VR128:$src1,
|
(v2f64 (movlp VR128:$src1,
|
||||||
(scalar_to_vector (loadf64 addr:$src2)))))]>;
|
(scalar_to_vector (loadf64 addr:$src2)))))]>;
|
||||||
def MOVHPDrm : PDI<0x16, MRMSrcMem,
|
def MOVHPDrm : PDI<0x16, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
||||||
"movhpd\t{$src2, $dst|$dst, $src2}",
|
"movhpd\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2f64 (movhp VR128:$src1,
|
(v2f64 (movhp VR128:$src1,
|
||||||
(scalar_to_vector (loadf64 addr:$src2)))))]>;
|
(scalar_to_vector (loadf64 addr:$src2)))))]>;
|
||||||
} // AddedComplexity
|
} // AddedComplexity
|
||||||
@ -1567,7 +1571,7 @@ def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
|
|||||||
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
|
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
|
||||||
VR128:$src2))]>;
|
VR128:$src2))]>;
|
||||||
def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
|
def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
|
||||||
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
|
"cvtsd2ss\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
|
[(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
|
||||||
(load addr:$src2)))]>;
|
(load addr:$src2)))]>;
|
||||||
@ -1615,7 +1619,7 @@ multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
|
|||||||
def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
|
def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
|
||||||
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
|
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
|
||||||
[(set FR64:$dst, (OpNode (load addr:$src)))]>;
|
[(set FR64:$dst, (OpNode (load addr:$src)))]>;
|
||||||
|
|
||||||
// Vector operation, reg.
|
// Vector operation, reg.
|
||||||
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||||
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
|
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
|
||||||
@ -1715,12 +1719,12 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
|
def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
|
||||||
"cmp${cc}pd\t{$src, $dst|$dst, $src}",
|
"cmp${cc}pd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
|
[(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
|
||||||
VR128:$src, imm:$cc))]>;
|
VR128:$src, imm:$cc))]>;
|
||||||
def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
|
def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
|
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
|
||||||
"cmp${cc}pd\t{$src, $dst|$dst, $src}",
|
"cmp${cc}pd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
|
[(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
|
||||||
@ -1733,12 +1737,12 @@ def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
|
|||||||
|
|
||||||
// Shuffle and unpack instructions
|
// Shuffle and unpack instructions
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
|
def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
|
||||||
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
|
(v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
|
||||||
def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
|
def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1,
|
(outs VR128:$dst), (ins VR128:$src1,
|
||||||
f128mem:$src2, i8imm:$src3),
|
f128mem:$src2, i8imm:$src3),
|
||||||
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||||
@ -1747,24 +1751,24 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
VR128:$src1, (memopv2f64 addr:$src2))))]>;
|
VR128:$src1, (memopv2f64 addr:$src2))))]>;
|
||||||
|
|
||||||
let AddedComplexity = 10 in {
|
let AddedComplexity = 10 in {
|
||||||
def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
|
def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||||
"unpckhpd\t{$src2, $dst|$dst, $src2}",
|
"unpckhpd\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
|
(v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
|
||||||
def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
|
def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
||||||
"unpckhpd\t{$src2, $dst|$dst, $src2}",
|
"unpckhpd\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2f64 (unpckh VR128:$src1,
|
(v2f64 (unpckh VR128:$src1,
|
||||||
(memopv2f64 addr:$src2))))]>;
|
(memopv2f64 addr:$src2))))]>;
|
||||||
|
|
||||||
def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
|
def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||||
"unpcklpd\t{$src2, $dst|$dst, $src2}",
|
"unpcklpd\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
|
(v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
|
||||||
def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
|
def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
|
||||||
"unpcklpd\t{$src2, $dst|$dst, $src2}",
|
"unpcklpd\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
@ -1773,7 +1777,7 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
} // Constraints = "$src1 = $dst"
|
} // Constraints = "$src1 = $dst"
|
||||||
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
// SSE integer instructions
|
// SSE integer instructions
|
||||||
|
|
||||||
// Move Instructions
|
// Move Instructions
|
||||||
@ -1828,14 +1832,17 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
|
|||||||
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
|
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
|
||||||
string OpcodeStr,
|
string OpcodeStr,
|
||||||
Intrinsic IntId, Intrinsic IntId2> {
|
Intrinsic IntId, Intrinsic IntId2> {
|
||||||
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
|
||||||
|
VR128:$src2),
|
||||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||||
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
|
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
|
||||||
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
|
||||||
|
i128mem:$src2),
|
||||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||||
[(set VR128:$dst, (IntId VR128:$src1,
|
[(set VR128:$dst, (IntId VR128:$src1,
|
||||||
(bitconvert (memopv2i64 addr:$src2))))]>;
|
(bitconvert (memopv2i64 addr:$src2))))]>;
|
||||||
def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
|
def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), (ins VR128:$src1,
|
||||||
|
i32i8imm:$src2),
|
||||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||||
[(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
|
[(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
|
||||||
}
|
}
|
||||||
@ -1843,15 +1850,17 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
|
|||||||
/// PDI_binop_rm - Simple SSE2 binary operator.
|
/// PDI_binop_rm - Simple SSE2 binary operator.
|
||||||
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||||
ValueType OpVT, bit Commutable = 0> {
|
ValueType OpVT, bit Commutable = 0> {
|
||||||
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
|
||||||
|
VR128:$src2),
|
||||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||||
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
|
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
|
||||||
let isCommutable = Commutable;
|
let isCommutable = Commutable;
|
||||||
}
|
}
|
||||||
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
|
||||||
|
i128mem:$src2),
|
||||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||||
[(set VR128:$dst, (OpVT (OpNode VR128:$src1,
|
[(set VR128:$dst, (OpVT (OpNode VR128:$src1,
|
||||||
(bitconvert (memopv2i64 addr:$src2)))))]>;
|
(bitconvert (memopv2i64 addr:$src2)))))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
|
/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
|
||||||
@ -1861,14 +1870,17 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||||||
///
|
///
|
||||||
multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||||
bit Commutable = 0> {
|
bit Commutable = 0> {
|
||||||
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
|
(ins VR128:$src1, VR128:$src2),
|
||||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||||
[(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
|
[(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
|
||||||
let isCommutable = Commutable;
|
let isCommutable = Commutable;
|
||||||
}
|
}
|
||||||
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
|
||||||
|
(ins VR128:$src1, i128mem:$src2),
|
||||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||||
[(set VR128:$dst, (OpNode VR128:$src1,(memopv2i64 addr:$src2)))]>;
|
[(set VR128:$dst, (OpNode VR128:$src1,
|
||||||
|
(memopv2i64 addr:$src2)))]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // Constraints = "$src1 = $dst"
|
} // Constraints = "$src1 = $dst"
|
||||||
@ -2032,8 +2044,8 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
|
|||||||
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
|
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
|
||||||
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
[(set VR128:$dst, (v4i32 (pshufd:$src2
|
[(set VR128:$dst, (v4i32 (pshufd:$src2
|
||||||
(bc_v4i32(memopv2i64 addr:$src1)),
|
(bc_v4i32(memopv2i64 addr:$src1)),
|
||||||
(undef))))]>;
|
(undef))))]>;
|
||||||
|
|
||||||
// SSE2 with ImmT == Imm8 and XS prefix.
|
// SSE2 with ImmT == Imm8 and XS prefix.
|
||||||
def PSHUFHWri : Ii8<0x70, MRMSrcReg,
|
def PSHUFHWri : Ii8<0x70, MRMSrcReg,
|
||||||
@ -2046,8 +2058,8 @@ def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
|
|||||||
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
|
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
|
||||||
"pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
"pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
[(set VR128:$dst, (v8i16 (pshufhw:$src2
|
[(set VR128:$dst, (v8i16 (pshufhw:$src2
|
||||||
(bc_v8i16 (memopv2i64 addr:$src1)),
|
(bc_v8i16 (memopv2i64 addr:$src1)),
|
||||||
(undef))))]>,
|
(undef))))]>,
|
||||||
XS, Requires<[HasSSE2]>;
|
XS, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
// SSE2 with ImmT == Imm8 and XD prefix.
|
// SSE2 with ImmT == Imm8 and XD prefix.
|
||||||
@ -2067,90 +2079,90 @@ def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
|
|||||||
|
|
||||||
|
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
|
def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||||
"punpcklbw\t{$src2, $dst|$dst, $src2}",
|
"punpcklbw\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v16i8 (unpckl VR128:$src1, VR128:$src2)))]>;
|
(v16i8 (unpckl VR128:$src1, VR128:$src2)))]>;
|
||||||
def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
|
def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||||
"punpcklbw\t{$src2, $dst|$dst, $src2}",
|
"punpcklbw\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(unpckl VR128:$src1,
|
(unpckl VR128:$src1,
|
||||||
(bc_v16i8 (memopv2i64 addr:$src2))))]>;
|
(bc_v16i8 (memopv2i64 addr:$src2))))]>;
|
||||||
def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
|
def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||||
"punpcklwd\t{$src2, $dst|$dst, $src2}",
|
"punpcklwd\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v8i16 (unpckl VR128:$src1, VR128:$src2)))]>;
|
(v8i16 (unpckl VR128:$src1, VR128:$src2)))]>;
|
||||||
def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
|
def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||||
"punpcklwd\t{$src2, $dst|$dst, $src2}",
|
"punpcklwd\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(unpckl VR128:$src1,
|
(unpckl VR128:$src1,
|
||||||
(bc_v8i16 (memopv2i64 addr:$src2))))]>;
|
(bc_v8i16 (memopv2i64 addr:$src2))))]>;
|
||||||
def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
|
def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||||
"punpckldq\t{$src2, $dst|$dst, $src2}",
|
"punpckldq\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v4i32 (unpckl VR128:$src1, VR128:$src2)))]>;
|
(v4i32 (unpckl VR128:$src1, VR128:$src2)))]>;
|
||||||
def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
|
def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||||
"punpckldq\t{$src2, $dst|$dst, $src2}",
|
"punpckldq\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(unpckl VR128:$src1,
|
(unpckl VR128:$src1,
|
||||||
(bc_v4i32 (memopv2i64 addr:$src2))))]>;
|
(bc_v4i32 (memopv2i64 addr:$src2))))]>;
|
||||||
def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
|
def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||||
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
|
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
|
(v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
|
||||||
def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
|
def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||||
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
|
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2i64 (unpckl VR128:$src1,
|
(v2i64 (unpckl VR128:$src1,
|
||||||
(memopv2i64 addr:$src2))))]>;
|
(memopv2i64 addr:$src2))))]>;
|
||||||
|
|
||||||
def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
|
def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||||
"punpckhbw\t{$src2, $dst|$dst, $src2}",
|
"punpckhbw\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v16i8 (unpckh VR128:$src1, VR128:$src2)))]>;
|
(v16i8 (unpckh VR128:$src1, VR128:$src2)))]>;
|
||||||
def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
|
def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||||
"punpckhbw\t{$src2, $dst|$dst, $src2}",
|
"punpckhbw\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(unpckh VR128:$src1,
|
(unpckh VR128:$src1,
|
||||||
(bc_v16i8 (memopv2i64 addr:$src2))))]>;
|
(bc_v16i8 (memopv2i64 addr:$src2))))]>;
|
||||||
def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
|
def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||||
"punpckhwd\t{$src2, $dst|$dst, $src2}",
|
"punpckhwd\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v8i16 (unpckh VR128:$src1, VR128:$src2)))]>;
|
(v8i16 (unpckh VR128:$src1, VR128:$src2)))]>;
|
||||||
def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
|
def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||||
"punpckhwd\t{$src2, $dst|$dst, $src2}",
|
"punpckhwd\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(unpckh VR128:$src1,
|
(unpckh VR128:$src1,
|
||||||
(bc_v8i16 (memopv2i64 addr:$src2))))]>;
|
(bc_v8i16 (memopv2i64 addr:$src2))))]>;
|
||||||
def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
|
def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||||
"punpckhdq\t{$src2, $dst|$dst, $src2}",
|
"punpckhdq\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v4i32 (unpckh VR128:$src1, VR128:$src2)))]>;
|
(v4i32 (unpckh VR128:$src1, VR128:$src2)))]>;
|
||||||
def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
|
def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||||
"punpckhdq\t{$src2, $dst|$dst, $src2}",
|
"punpckhdq\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(unpckh VR128:$src1,
|
(unpckh VR128:$src1,
|
||||||
(bc_v4i32 (memopv2i64 addr:$src2))))]>;
|
(bc_v4i32 (memopv2i64 addr:$src2))))]>;
|
||||||
def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
|
def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||||
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
|
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
|
(v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
|
||||||
def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
|
def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
|
||||||
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
|
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
@ -2175,7 +2187,7 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
(outs VR128:$dst), (ins VR128:$src1,
|
(outs VR128:$dst), (ins VR128:$src1,
|
||||||
i16mem:$src2, i32i8imm:$src3),
|
i16mem:$src2, i32i8imm:$src3),
|
||||||
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
|
(X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
|
||||||
imm:$src3))]>;
|
imm:$src3))]>;
|
||||||
}
|
}
|
||||||
@ -2205,7 +2217,7 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
|
|||||||
[(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
|
[(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
|
||||||
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
||||||
"movnti\t{$src, $dst|$dst, $src}",
|
"movnti\t{$src, $dst|$dst, $src}",
|
||||||
[(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
|
[(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
|
||||||
TB, Requires<[HasSSE2]>;
|
TB, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
// Flush cache
|
// Flush cache
|
||||||
@ -2220,11 +2232,11 @@ def MFENCE : I<0xAE, MRM6r, (outs), (ins),
|
|||||||
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
|
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
//TODO: custom lower this so as to never even generate the noop
|
//TODO: custom lower this so as to never even generate the noop
|
||||||
def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
|
def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
|
||||||
(i8 0)), (NOOP)>;
|
(i8 0)), (NOOP)>;
|
||||||
def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
|
def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
|
||||||
def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
|
def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
|
||||||
def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
|
def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
|
||||||
(i8 1)), (MFENCE)>;
|
(i8 1)), (MFENCE)>;
|
||||||
|
|
||||||
// Alias instructions that map zero vector to pxor / xorp* for sse.
|
// Alias instructions that map zero vector to pxor / xorp* for sse.
|
||||||
@ -2243,7 +2255,7 @@ def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
|
|||||||
(v2f64 (scalar_to_vector FR64:$src)))]>;
|
(v2f64 (scalar_to_vector FR64:$src)))]>;
|
||||||
def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
|
||||||
"movsd\t{$src, $dst|$dst, $src}",
|
"movsd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
|
(v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
|
||||||
|
|
||||||
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
|
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
|
||||||
@ -2402,9 +2414,9 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
|
|||||||
(MOVZPQILo2PQIrm addr:$src)>;
|
(MOVZPQILo2PQIrm addr:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
// SSE3 Instructions
|
// SSE3 Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
// Move Instructions
|
// Move Instructions
|
||||||
def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||||
@ -2528,9 +2540,9 @@ let AddedComplexity = 20 in
|
|||||||
def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
|
def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
|
||||||
(MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
|
(MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
// SSSE3 Instructions
|
// SSSE3 Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
|
/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
|
||||||
multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
|
multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
|
||||||
@ -2804,12 +2816,13 @@ def : Pat<(X86pshufb VR128:$src, VR128:$mask),
|
|||||||
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
|
def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
|
||||||
(PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
|
(PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
// Non-Instruction Patterns
|
// Non-Instruction Patterns
|
||||||
//===----------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
// extload f32 -> f64. This matches load+fextend because we have a hack in
|
// extload f32 -> f64. This matches load+fextend because we have a hack in
|
||||||
// the isel (PreprocessForFPConvert) that can introduce loads after dag combine.
|
// the isel (PreprocessForFPConvert) that can introduce loads after dag
|
||||||
|
// combine.
|
||||||
// Since these loads aren't folded into the fextend, we have to match it
|
// Since these loads aren't folded into the fextend, we have to match it
|
||||||
// explicitly here.
|
// explicitly here.
|
||||||
let Predicates = [HasSSE2] in
|
let Predicates = [HasSSE2] in
|
||||||
@ -2887,12 +2900,12 @@ def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
|
|||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
// Special unary SHUFPDrri case.
|
// Special unary SHUFPDrri case.
|
||||||
def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
|
def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
|
||||||
(SHUFPDrri VR128:$src1, VR128:$src1,
|
(SHUFPDrri VR128:$src1, VR128:$src1,
|
||||||
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
// Special unary SHUFPDrri case.
|
// Special unary SHUFPDrri case.
|
||||||
def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
|
def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
|
||||||
(SHUFPDrri VR128:$src1, VR128:$src1,
|
(SHUFPDrri VR128:$src1, VR128:$src1,
|
||||||
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
// Unary v4f32 shuffle with PSHUF* in order to fold a load.
|
// Unary v4f32 shuffle with PSHUF* in order to fold a load.
|
||||||
@ -2902,16 +2915,16 @@ def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
|
|||||||
|
|
||||||
// Special binary v4i32 shuffle cases with SHUFPS.
|
// Special binary v4i32 shuffle cases with SHUFPS.
|
||||||
def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
|
def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
|
||||||
(SHUFPSrri VR128:$src1, VR128:$src2,
|
(SHUFPSrri VR128:$src1, VR128:$src2,
|
||||||
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
|
||||||
(SHUFPSrmi VR128:$src1, addr:$src2,
|
(SHUFPSrmi VR128:$src1, addr:$src2,
|
||||||
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
// Special binary v2i64 shuffle cases using SHUFPDrri.
|
// Special binary v2i64 shuffle cases using SHUFPDrri.
|
||||||
def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
|
def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
|
||||||
(SHUFPDrri VR128:$src1, VR128:$src2,
|
(SHUFPDrri VR128:$src1, VR128:$src2,
|
||||||
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
(SHUFFLE_get_shuf_imm VR128:$src3))>,
|
||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
|
|
||||||
@ -3033,7 +3046,7 @@ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
|
|||||||
// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
|
// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
|
||||||
// fall back to this for SSE1)
|
// fall back to this for SSE1)
|
||||||
def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
|
def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
|
||||||
(SHUFPSrri VR128:$src2, VR128:$src1,
|
(SHUFPSrri VR128:$src2, VR128:$src1,
|
||||||
(SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>;
|
(SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>;
|
||||||
|
|
||||||
// Set lowest element and zero upper elements.
|
// Set lowest element and zero upper elements.
|
||||||
@ -3100,7 +3113,7 @@ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
|
|||||||
(MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
(MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||||
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
|
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
|
||||||
(MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
(MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// SSE4.1 Instructions
|
// SSE4.1 Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -3111,7 +3124,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
|
|||||||
Intrinsic V2F64Int> {
|
Intrinsic V2F64Int> {
|
||||||
// Intrinsic operation, reg.
|
// Intrinsic operation, reg.
|
||||||
// Vector intrinsic operation, reg
|
// Vector intrinsic operation, reg
|
||||||
def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
|
def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
@ -3152,41 +3165,41 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
|
|||||||
Intrinsic F64Int> {
|
Intrinsic F64Int> {
|
||||||
// Intrinsic operation, reg.
|
// Intrinsic operation, reg.
|
||||||
def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
|
def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
|
||||||
(outs VR128:$dst),
|
(outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
|
(F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||||
OpSize;
|
OpSize;
|
||||||
|
|
||||||
// Intrinsic operation, mem.
|
// Intrinsic operation, mem.
|
||||||
def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
|
def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
|
||||||
(outs VR128:$dst),
|
(outs VR128:$dst),
|
||||||
(ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
|
(ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
|
(F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
|
||||||
OpSize;
|
OpSize;
|
||||||
|
|
||||||
// Intrinsic operation, reg.
|
// Intrinsic operation, reg.
|
||||||
def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
|
def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
|
||||||
(outs VR128:$dst),
|
(outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
|
(F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||||
OpSize;
|
OpSize;
|
||||||
|
|
||||||
// Intrinsic operation, mem.
|
// Intrinsic operation, mem.
|
||||||
def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
|
def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
|
||||||
(outs VR128:$dst),
|
(outs VR128:$dst),
|
||||||
(ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
|
(ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
|
(F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
|
||||||
OpSize;
|
OpSize;
|
||||||
}
|
}
|
||||||
@ -3305,9 +3318,9 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
Intrinsic IntId128, bit Commutable = 0> {
|
Intrinsic IntId128, bit Commutable = 0> {
|
||||||
def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
|
(IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||||
OpSize {
|
OpSize {
|
||||||
let isCommutable = Commutable;
|
let isCommutable = Commutable;
|
||||||
@ -3342,7 +3355,7 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
|
|||||||
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
|
multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
|
||||||
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
|
def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2),
|
(ins VR128:$src1, VR128:$src2),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
|
"\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
|
||||||
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
|
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
|
||||||
OpSize;
|
OpSize;
|
||||||
@ -3474,13 +3487,13 @@ def : Pat<(int_x86_sse41_pmovzxbq
|
|||||||
multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
|
multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
|
||||||
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
|
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
|
||||||
(ins VR128:$src1, i32i8imm:$src2),
|
(ins VR128:$src1, i32i8imm:$src2),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
[(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
|
[(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
|
||||||
OpSize;
|
OpSize;
|
||||||
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
||||||
(ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
|
(ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
[]>, OpSize;
|
[]>, OpSize;
|
||||||
// FIXME:
|
// FIXME:
|
||||||
@ -3495,7 +3508,7 @@ defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
|
|||||||
multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
|
multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
|
||||||
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
||||||
(ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
|
(ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
[]>, OpSize;
|
[]>, OpSize;
|
||||||
// FIXME:
|
// FIXME:
|
||||||
@ -3510,13 +3523,13 @@ defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
|
|||||||
multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
|
multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
|
||||||
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
|
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
|
||||||
(ins VR128:$src1, i32i8imm:$src2),
|
(ins VR128:$src1, i32i8imm:$src2),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
[(set GR32:$dst,
|
[(set GR32:$dst,
|
||||||
(extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
|
(extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
|
||||||
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
||||||
(ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
|
(ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
[(store (extractelt (v4i32 VR128:$src1), imm:$src2),
|
[(store (extractelt (v4i32 VR128:$src1), imm:$src2),
|
||||||
addr:$dst)]>, OpSize;
|
addr:$dst)]>, OpSize;
|
||||||
@ -3530,14 +3543,14 @@ defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
|
|||||||
multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
|
multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
|
||||||
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
|
def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
|
||||||
(ins VR128:$src1, i32i8imm:$src2),
|
(ins VR128:$src1, i32i8imm:$src2),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
[(set GR32:$dst,
|
[(set GR32:$dst,
|
||||||
(extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
|
(extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
|
||||||
OpSize;
|
OpSize;
|
||||||
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
def mr : SS4AIi8<opc, MRMDestMem, (outs),
|
||||||
(ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
|
(ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||||
[(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
|
[(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
|
||||||
addr:$dst)]>, OpSize;
|
addr:$dst)]>, OpSize;
|
||||||
@ -3556,15 +3569,15 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
|
multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
|
||||||
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
|
(ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
|
(X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
|
||||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
|
(ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
|
(X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
|
||||||
imm:$src3))]>, OpSize;
|
imm:$src3))]>, OpSize;
|
||||||
}
|
}
|
||||||
@ -3576,16 +3589,16 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
|
multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
|
||||||
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
|
(ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
|
(v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
|
||||||
OpSize;
|
OpSize;
|
||||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
|
(ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
|
(v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
|
||||||
imm:$src3)))]>, OpSize;
|
imm:$src3)))]>, OpSize;
|
||||||
}
|
}
|
||||||
@ -3601,15 +3614,16 @@ let Constraints = "$src1 = $dst" in {
|
|||||||
multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
|
multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
|
||||||
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, OpSize;
|
(X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||||
|
OpSize;
|
||||||
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||||
(ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
|
(ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
|
||||||
!strconcat(OpcodeStr,
|
!strconcat(OpcodeStr,
|
||||||
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(X86insrtps VR128:$src1,
|
(X86insrtps VR128:$src1,
|
||||||
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
|
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
|
||||||
imm:$src3))]>, OpSize;
|
imm:$src3))]>, OpSize;
|
||||||
|
Loading…
Reference in New Issue
Block a user