X86: add more GATHER intrinsics in LLVM

Corrected type for index of llvm.x86.avx2.gather.d.pd.256
  from 256-bit to 128-bit.
Corrected types for src|dst|mask of llvm.x86.avx2.gather.q.ps.256
  from 256-bit to 128-bit.

Support the following intrinsics:
  llvm.x86.avx2.gather.d.q, llvm.x86.avx2.gather.q.q
  llvm.x86.avx2.gather.d.q.256, llvm.x86.avx2.gather.q.q.256
  llvm.x86.avx2.gather.d.d, llvm.x86.avx2.gather.q.d
  llvm.x86.avx2.gather.d.d.256, llvm.x86.avx2.gather.q.d.256


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159402 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Manman Ren 2012-06-29 00:54:20 +00:00
parent cfc49bfd3f
commit 40307c7dbe
8 changed files with 231 additions and 39 deletions

View File

@ -1752,7 +1752,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[IntrReadMem]>;
def int_x86_avx2_gather_d_pd_256 : GCCBuiltin<"__builtin_ia32_gatherd_pd256">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v4f64_ty, llvm_i8_ty],
[llvm_v4f64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4f64_ty, llvm_i8_ty],
[IntrReadMem]>;
def int_x86_avx2_gather_q_pd : GCCBuiltin<"__builtin_ia32_gatherq_pd">,
Intrinsic<[llvm_v2f64_ty],
@ -1775,8 +1775,41 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrReadMem]>;
def int_x86_avx2_gather_q_ps_256 : GCCBuiltin<"__builtin_ia32_gatherq_ps256">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v8f32_ty, llvm_i8_ty],
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrReadMem]>;
def int_x86_avx2_gather_d_q : GCCBuiltin<"__builtin_ia32_gatherd_q">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrReadMem]>;
def int_x86_avx2_gather_d_q_256 : GCCBuiltin<"__builtin_ia32_gatherd_q256">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrReadMem]>;
def int_x86_avx2_gather_q_q : GCCBuiltin<"__builtin_ia32_gatherq_q">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrReadMem]>;
def int_x86_avx2_gather_q_q_256 : GCCBuiltin<"__builtin_ia32_gatherq_q256">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrReadMem]>;
def int_x86_avx2_gather_d_d : GCCBuiltin<"__builtin_ia32_gatherd_d">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrReadMem]>;
def int_x86_avx2_gather_d_d_256 : GCCBuiltin<"__builtin_ia32_gatherd_d256">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrReadMem]>;
def int_x86_avx2_gather_q_d : GCCBuiltin<"__builtin_ia32_gatherq_d">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrReadMem]>;
def int_x86_avx2_gather_q_d_256 : GCCBuiltin<"__builtin_ia32_gatherq_d256">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrReadMem]>;
}

View File

@ -506,18 +506,26 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
// We can tell whether it is VSIB or SIB after instruction ID is decoded,
// but instruction ID may not be decoded yet when calling readSIB.
uint32_t Opcode = mcInst.getOpcode();
bool IsGather = (Opcode == X86::VGATHERDPDrm ||
Opcode == X86::VGATHERQPDrm ||
Opcode == X86::VGATHERDPSrm ||
Opcode == X86::VGATHERQPSrm);
bool IsGatherY = (Opcode == X86::VGATHERDPDYrm ||
Opcode == X86::VGATHERQPDYrm ||
Opcode == X86::VGATHERDPSYrm ||
Opcode == X86::VGATHERQPSYrm);
if (IsGather || IsGatherY) {
bool IndexIs128 = (Opcode == X86::VGATHERDPDrm ||
Opcode == X86::VGATHERDPDYrm ||
Opcode == X86::VGATHERQPDrm ||
Opcode == X86::VGATHERDPSrm ||
Opcode == X86::VGATHERQPSrm ||
Opcode == X86::VPGATHERDQrm ||
Opcode == X86::VPGATHERDQYrm ||
Opcode == X86::VPGATHERQQrm ||
Opcode == X86::VPGATHERDDrm ||
Opcode == X86::VPGATHERQDrm);
bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||
Opcode == X86::VGATHERDPSYrm ||
Opcode == X86::VGATHERQPSYrm ||
Opcode == X86::VPGATHERQQYrm ||
Opcode == X86::VPGATHERDDYrm ||
Opcode == X86::VPGATHERQDYrm);
if (IndexIs128 || IndexIs256) {
unsigned IndexOffset = insn.sibIndex -
(insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
SIBIndex IndexBase = IsGatherY ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
insn.sibIndex = (SIBIndex)(IndexBase +
(insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
}

View File

@ -2011,6 +2011,22 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return SelectGather(Node, X86::VGATHERQPSrm);
case Intrinsic::x86_avx2_gather_q_ps_256:
return SelectGather(Node, X86::VGATHERQPSYrm);
case Intrinsic::x86_avx2_gather_d_q:
return SelectGather(Node, X86::VPGATHERDQrm);
case Intrinsic::x86_avx2_gather_d_q_256:
return SelectGather(Node, X86::VPGATHERDQYrm);
case Intrinsic::x86_avx2_gather_q_q:
return SelectGather(Node, X86::VPGATHERQQrm);
case Intrinsic::x86_avx2_gather_q_q_256:
return SelectGather(Node, X86::VPGATHERQQYrm);
case Intrinsic::x86_avx2_gather_d_d:
return SelectGather(Node, X86::VPGATHERDDrm);
case Intrinsic::x86_avx2_gather_d_d_256:
return SelectGather(Node, X86::VPGATHERDDYrm);
case Intrinsic::x86_avx2_gather_q_d:
return SelectGather(Node, X86::VPGATHERQDrm);
case Intrinsic::x86_avx2_gather_q_d_256:
return SelectGather(Node, X86::VPGATHERQDYrm);
}
break;
}

View File

@ -325,12 +325,10 @@ def f128mem : X86MemOperand<"printf128mem"> {
let ParserMatchClass = X86Mem128AsmOperand; }
def f256mem : X86MemOperand<"printf256mem">{
let ParserMatchClass = X86Mem256AsmOperand; }
def v128mem : Operand<iPTR> {
let PrintMethod = "printf128mem";
def v128mem : X86MemOperand<"printf128mem"> {
let MIOperandInfo = (ops ptr_rc, i8imm, VR128, i32imm, i8imm);
let ParserMatchClass = X86Mem128AsmOperand; }
def v256mem : Operand<iPTR> {
let PrintMethod = "printf256mem";
def v256mem : X86MemOperand<"printf256mem"> {
let MIOperandInfo = (ops ptr_rc, i8imm, VR256, i32imm, i8imm);
let ParserMatchClass = X86Mem256AsmOperand; }
}

View File

@ -7997,37 +7997,52 @@ defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
//===----------------------------------------------------------------------===//
// VGATHER - GATHER Operations
//
// [(set VR128:$dst, (IntGather128 VR128:$src1, addr:$src2, VR128:$idx,
// VR128:$mask, (i8 imm:$sc)))]>, VEX_4VOp3;
// [(set VR256:$dst, (IntGather256 VR256:$src1, addr:$src2, VR256:$idx,
// VR256:$mask, (i8 imm:$sc)))]>, VEX_4VOp3;
multiclass avx2_gather<bits<8> opc, string OpcodeStr,
RegisterClass RC256, X86MemOperand memop256,
Intrinsic IntGather128, Intrinsic IntGather256> {
def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, v128mem:$src2, VR128:$mask),
!strconcat(OpcodeStr,
"\t{$src1, $src2, $mask|$mask, $src2, $src1}"),
[]>, VEX_4VOp3;
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, v256mem:$src2, VR256:$mask),
def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst),
(ins RC256:$src1, memop256:$src2, RC256:$mask),
!strconcat(OpcodeStr,
"\t{$src1, $src2, $mask|$mask, $src2, $src1}"),
[]>, VEX_4VOp3;
[]>, VEX_4VOp3, VEX_L;
}
//let Constraints = "$src1 = $dst, $mask = $mask_wb" in {
let Constraints = "$src1 = $dst" in {
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd",
VR256, v128mem,
int_x86_avx2_gather_d_pd,
int_x86_avx2_gather_d_pd_256>, VEX_W;
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd",
VR256, v256mem,
int_x86_avx2_gather_q_pd,
int_x86_avx2_gather_q_pd_256>, VEX_W;
defm VGATHERDPS : avx2_gather<0x92, "vgatherdps",
VR256, v256mem,
int_x86_avx2_gather_d_ps,
int_x86_avx2_gather_d_ps_256>;
defm VGATHERQPS : avx2_gather<0x93, "vgatherqps",
VR128, v256mem,
int_x86_avx2_gather_q_ps,
int_x86_avx2_gather_q_ps_256>;
defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq",
VR256, v128mem,
int_x86_avx2_gather_d_q,
int_x86_avx2_gather_d_q_256>, VEX_W;
defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq",
VR256, v256mem,
int_x86_avx2_gather_q_q,
int_x86_avx2_gather_q_q_256>, VEX_W;
defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd",
VR256, v256mem,
int_x86_avx2_gather_d_d,
int_x86_avx2_gather_d_d_256>;
defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd",
VR128, v256mem,
int_x86_avx2_gather_q_d,
int_x86_avx2_gather_q_d_256>;
}

View File

@ -988,14 +988,14 @@ declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*,
<4 x i32>, <2 x double>, i8) nounwind readonly
define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1,
<8 x i32> %idx, <4 x double> %mask) {
<4 x i32> %idx, <4 x double> %mask) {
; CHECK: vgatherdpd
%res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0,
i8* %a1, <8 x i32> %idx, <4 x double> %mask, i8 2) ;
i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ;
ret <4 x double> %res
}
declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*,
<8 x i32>, <4 x double>, i8) nounwind readonly
<4 x i32>, <4 x double>, i8) nounwind readonly
define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1,
<2 x i64> %idx, <2 x double> %mask) {
@ -1047,12 +1047,92 @@ define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1,
declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*,
<2 x i64>, <4 x float>, i8) nounwind readonly
define <8 x float> @test_x86_avx2_gather_q_ps_256(<8 x float> %a0, i8* %a1,
<4 x i64> %idx, <8 x float> %mask) {
define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1,
<4 x i64> %idx, <4 x float> %mask) {
; CHECK: vgatherqps
%res = call <8 x float> @llvm.x86.avx2.gather.q.ps.256(<8 x float> %a0,
i8* %a1, <4 x i64> %idx, <8 x float> %mask, i8 2) ;
ret <8 x float> %res
%res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0,
i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ;
ret <4 x float> %res
}
declare <8 x float> @llvm.x86.avx2.gather.q.ps.256(<8 x float>, i8*,
<4 x i64>, <8 x float>, i8) nounwind readonly
declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*,
<4 x i64>, <4 x float>, i8) nounwind readonly
define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1,
<4 x i32> %idx, <2 x i64> %mask) {
; CHECK: vpgatherdq
%res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0,
i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*,
<4 x i32>, <2 x i64>, i8) nounwind readonly
define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1,
<4 x i32> %idx, <4 x i64> %mask) {
; CHECK: vpgatherdq
%res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0,
i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ;
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*,
<4 x i32>, <4 x i64>, i8) nounwind readonly
define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1,
<2 x i64> %idx, <2 x i64> %mask) {
; CHECK: vpgatherqq
%res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ;
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*,
<2 x i64>, <2 x i64>, i8) nounwind readonly
define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1,
<4 x i64> %idx, <4 x i64> %mask) {
; CHECK: vpgatherqq
%res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0,
i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ;
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*,
<4 x i64>, <4 x i64>, i8) nounwind readonly
define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1,
<4 x i32> %idx, <4 x i32> %mask) {
; CHECK: vpgatherdd
%res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0,
i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*,
<4 x i32>, <4 x i32>, i8) nounwind readonly
define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1,
<8 x i32> %idx, <8 x i32> %mask) {
; CHECK: vpgatherdd
%res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0,
i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ;
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*,
<8 x i32>, <8 x i32>, i8) nounwind readonly
define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1,
<2 x i64> %idx, <4 x i32> %mask) {
; CHECK: vpgatherqd
%res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0,
i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*,
<2 x i64>, <4 x i32>, i8) nounwind readonly
define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1,
<4 x i64> %idx, <4 x i32> %mask) {
; CHECK: vpgatherqd
%res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0,
i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ;
ret <4 x i32> %res
}
declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*,
<4 x i64>, <4 x i32>, i8) nounwind readonly

View File

@ -728,9 +728,27 @@
# CHECK: vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2
0xc4 0xe2 0xe9 0x92 0x04 0x4f
# CHECK: vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10
# CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2
0xc4 0xe2 0xed 0x92 0x04 0x4f
# CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10
0xc4 0x02 0x29 0x93 0x04 0x4f
# CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10
0xc4 0x02 0x2d 0x93 0x04 0x4f
# CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2
0xc4 0xe2 0xe9 0x90 0x04 0x4f
# CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2
0xc4 0xe2 0xed 0x90 0x04 0x4f
# CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10
0xc4 0x02 0x29 0x91 0x04 0x4f
# CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
0xc4 0x02 0x2d 0x91 0x04 0x4f
# rdar://8812056 lldb doesn't print the x86 lock prefix when disassembling
# CHECK: lock
# CHECK-NEXT: xaddq %rcx, %rbx

View File

@ -4126,6 +4126,30 @@ _foo2:
// CHECK: encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x4f]
vgatherdpd %xmm0, (%rdi,%xmm1,2), %xmm2
// CHECK: vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10
// CHECK: vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2
// CHECK: encoding: [0xc4,0xe2,0xed,0x92,0x04,0x4f]
vgatherdpd %ymm0, (%rdi,%xmm1,2), %ymm2
// CHECK: vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10
// CHECK: encoding: [0xc4,0x02,0x29,0x93,0x04,0x4f]
vgatherqps %xmm8, (%r15,%xmm9,2), %xmm10
// CHECK: vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10
// CHECK: encoding: [0xc4,0x02,0x2d,0x93,0x04,0x4f]
vgatherqps %ymm8, (%r15,%ymm9,2), %ymm10
vgatherqps %xmm8, (%r15,%ymm9,2), %xmm10
// CHECK: vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2
// CHECK: encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x4f]
vpgatherdq %xmm0, (%rdi,%xmm1,2), %xmm2
// CHECK: vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2
// CHECK: encoding: [0xc4,0xe2,0xed,0x90,0x04,0x4f]
vpgatherdq %ymm0, (%rdi,%xmm1,2), %ymm2
// CHECK: vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10
// CHECK: encoding: [0xc4,0x02,0x29,0x91,0x04,0x4f]
vpgatherqd %xmm8, (%r15,%xmm9,2), %xmm10
// CHECK: vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10
// CHECK: encoding: [0xc4,0x02,0x2d,0x91,0x04,0x4f]
vpgatherqd %xmm8, (%r15,%ymm9,2), %xmm10