Add missing AVX convert instructions. Those instructions are not described in their SSE forms (although they exist), but add the AVX forms anyway, so the assembler can benefit from it

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109039 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bruno Cardoso Lopes 2010-07-21 21:37:59 +00:00
parent e62b690607
commit e29f37f6a1
3 changed files with 128 additions and 22 deletions

View File

@ -516,6 +516,14 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
}
multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
X86MemOperand x86memop, string asm> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
[]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
[]>;
}
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
string asm, Domain d> {
@ -526,25 +534,40 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
}
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
string asm> {
X86MemOperand x86memop, string asm> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
asm, []>;
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
(ins DstRC:$src1, x86memop:$src), asm, []>;
(ins DstRC:$src1, x86memop:$src),
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
}
let isAsmParserOnly = 1 in {
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
"cvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}">, XS,
VEX_4V;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
"cvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}">, XD,
VEX_4V;
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
defm VCVTTSS2SIr64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
VEX_W;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
defm VCVTTSD2SIr64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX,
VEX_W;
// The assembler can recognize rr 64-bit instructions by seeing a rxx
// register, but the same isn't true when only using memory operands,
// provide other assembly "l" and "q" forms to address this explicitly
// where appropriate to do so.
defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
VEX_4V;
defm VCVTSI2SSQ : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ssq">, XS,
VEX_4V, VEX_W;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
VEX_4V;
defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sdl">, XD,
VEX_4V;
defm VCVTSI2SDQ : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sdq">, XD,
VEX_4V, VEX_W;
}
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
@ -603,6 +626,14 @@ let isAsmParserOnly = 1 in {
defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD,
VEX;
// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
// Get rid of this hack or rename the intrinsics, there are several
// intructions that only match with the intrinsic form, why create duplicates
// to let them be recognized by the assembler?
defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
"cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
"cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
}
defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS;
@ -661,14 +692,17 @@ defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
XD;
let isAsmParserOnly = 1, Pattern = []<dag> in {
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
"cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB, VEX;
defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB, VEX;
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
"cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
"cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
VEX_W;
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB, VEX;
defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB, VEX;
}
let Pattern = []<dag> in {
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,

View File

@ -13142,3 +13142,19 @@
// CHECK: encoding: [0xc5,0xf8,0x77]
vzeroupper
// CHECK: vcvtsd2si %xmm4, %ecx
// CHECK: encoding: [0xc5,0xfb,0x2d,0xcc]
vcvtsd2si %xmm4, %ecx
// CHECK: vcvtsd2si (%ecx), %ecx
// CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
vcvtsd2si (%ecx), %ecx
// CHECK: vcvtsi2sdl (%ebp), %xmm0, %xmm7
// CHECK: encoding: [0xc5,0xfb,0x2a,0x7d,0x00]
vcvtsi2sdl (%ebp), %xmm0, %xmm7
// CHECK: vcvtsi2sdl (%esp), %xmm0, %xmm7
// CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24]
vcvtsi2sdl (%esp), %xmm0, %xmm7

View File

@ -3208,3 +3208,59 @@ pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0xc4,0x63,0x2d,0x06,0x18,0x07]
vperm2f128 $7, (%rax), %ymm10, %ymm11
// CHECK: vcvtsd2si %xmm8, %r8d
// CHECK: encoding: [0xc4,0x41,0x7b,0x2d,0xc0]
vcvtsd2si %xmm8, %r8d
// CHECK: vcvtsd2si (%rcx), %ecx
// CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
vcvtsd2si (%rcx), %ecx
// CHECK: vcvtss2si %xmm4, %rcx
// CHECK: encoding: [0xc4,0xe1,0xfa,0x2d,0xcc]
vcvtss2si %xmm4, %rcx
// CHECK: vcvtss2si (%rcx), %r8
// CHECK: encoding: [0xc4,0x61,0xfa,0x2d,0x01]
vcvtss2si (%rcx), %r8
// CHECK: vcvtsi2sdl %r8d, %xmm8, %xmm15
// CHECK: encoding: [0xc4,0x41,0x3b,0x2a,0xf8]
vcvtsi2sdl %r8d, %xmm8, %xmm15
// CHECK: vcvtsi2sdl (%rbp), %xmm8, %xmm15
// CHECK: encoding: [0xc5,0x3b,0x2a,0x7d,0x00]
vcvtsi2sdl (%rbp), %xmm8, %xmm15
// CHECK: vcvtsi2sdq %rcx, %xmm4, %xmm6
// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0xf1]
vcvtsi2sdq %rcx, %xmm4, %xmm6
// CHECK: vcvtsi2sdq (%rcx), %xmm4, %xmm6
// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0x31]
vcvtsi2sdq (%rcx), %xmm4, %xmm6
// CHECK: vcvtsi2ssq %rcx, %xmm4, %xmm6
// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0xf1]
vcvtsi2ssq %rcx, %xmm4, %xmm6
// CHECK: vcvtsi2ssq (%rcx), %xmm4, %xmm6
// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0x31]
vcvtsi2ssq (%rcx), %xmm4, %xmm6
// CHECK: vcvttsd2si %xmm4, %rcx
// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0xcc]
vcvttsd2si %xmm4, %rcx
// CHECK: vcvttsd2si (%rcx), %rcx
// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0x09]
vcvttsd2si (%rcx), %rcx
// CHECK: vcvttss2si %xmm4, %rcx
// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0xcc]
vcvttss2si %xmm4, %rcx
// CHECK: vcvttss2si (%rcx), %rcx
// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0x09]
vcvttss2si (%rcx), %rcx