mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-22 13:29:44 +00:00
Fix SSE4.1 roundss, roundsd. While the instructions have
the same pattern as roundpd/roundps, the Intel compiler builtins do not: rounds* has an extra operand. Fixes gcc.target/i386/sse4_1-rounds[sd]-[1234].c git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@57370 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fc19fbd2ca
commit
e397acce9d
@ -685,13 +685,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
|||||||
// FP rounding ops
|
// FP rounding ops
|
||||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||||
def int_x86_sse41_round_ss : GCCBuiltin<"__builtin_ia32_roundss">,
|
def int_x86_sse41_round_ss : GCCBuiltin<"__builtin_ia32_roundss">,
|
||||||
Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
|
Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
llvm_i32_ty], [IntrNoMem]>;
|
||||||
def int_x86_sse41_round_ps : GCCBuiltin<"__builtin_ia32_roundps">,
|
def int_x86_sse41_round_ps : GCCBuiltin<"__builtin_ia32_roundps">,
|
||||||
Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
|
Intrinsic<[llvm_v4f32_ty, llvm_v4f32_ty,
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
llvm_i32_ty], [IntrNoMem]>;
|
||||||
def int_x86_sse41_round_sd : GCCBuiltin<"__builtin_ia32_roundsd">,
|
def int_x86_sse41_round_sd : GCCBuiltin<"__builtin_ia32_roundsd">,
|
||||||
Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
|
Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
|
||||||
llvm_i32_ty], [IntrNoMem]>;
|
llvm_i32_ty], [IntrNoMem]>;
|
||||||
def int_x86_sse41_round_pd : GCCBuiltin<"__builtin_ia32_roundpd">,
|
def int_x86_sse41_round_pd : GCCBuiltin<"__builtin_ia32_roundpd">,
|
||||||
Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
|
Intrinsic<[llvm_v2f64_ty, llvm_v2f64_ty,
|
||||||
|
@ -3169,29 +3169,11 @@ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
|
|||||||
// SSE4.1 Instructions
|
// SSE4.1 Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
multiclass sse41_fp_unop_rm<bits<8> opcss, bits<8> opcps,
|
multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
|
||||||
bits<8> opcsd, bits<8> opcpd,
|
|
||||||
string OpcodeStr,
|
string OpcodeStr,
|
||||||
Intrinsic F32Int,
|
|
||||||
Intrinsic V4F32Int,
|
Intrinsic V4F32Int,
|
||||||
Intrinsic F64Int,
|
|
||||||
Intrinsic V2F64Int> {
|
Intrinsic V2F64Int> {
|
||||||
// Intrinsic operation, reg.
|
// Intrinsic operation, reg.
|
||||||
def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
|
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
|
|
||||||
!strconcat(OpcodeStr,
|
|
||||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
|
||||||
[(set VR128:$dst, (F32Int VR128:$src1, imm:$src2))]>,
|
|
||||||
OpSize;
|
|
||||||
|
|
||||||
// Intrinsic operation, mem.
|
|
||||||
def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
|
|
||||||
(outs VR128:$dst), (ins ssmem:$src1, i32i8imm:$src2),
|
|
||||||
!strconcat(OpcodeStr,
|
|
||||||
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
|
||||||
[(set VR128:$dst, (F32Int sse_load_f32:$src1, imm:$src2))]>,
|
|
||||||
OpSize;
|
|
||||||
|
|
||||||
// Vector intrinsic operation, reg
|
// Vector intrinsic operation, reg
|
||||||
def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
|
def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
|
||||||
@ -3209,22 +3191,6 @@ multiclass sse41_fp_unop_rm<bits<8> opcss, bits<8> opcps,
|
|||||||
(V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
|
(V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
|
||||||
OpSize;
|
OpSize;
|
||||||
|
|
||||||
// Intrinsic operation, reg.
|
|
||||||
def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
|
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
|
|
||||||
!strconcat(OpcodeStr,
|
|
||||||
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
|
||||||
[(set VR128:$dst, (F64Int VR128:$src1, imm:$src2))]>,
|
|
||||||
OpSize;
|
|
||||||
|
|
||||||
// Intrinsic operation, mem.
|
|
||||||
def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
|
|
||||||
(outs VR128:$dst), (ins sdmem:$src1, i32i8imm:$src2),
|
|
||||||
!strconcat(OpcodeStr,
|
|
||||||
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
|
||||||
[(set VR128:$dst, (F64Int sse_load_f64:$src1, imm:$src2))]>,
|
|
||||||
OpSize;
|
|
||||||
|
|
||||||
// Vector intrinsic operation, reg
|
// Vector intrinsic operation, reg
|
||||||
def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
|
def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
|
||||||
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
|
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
|
||||||
@ -3243,10 +3209,58 @@ multiclass sse41_fp_unop_rm<bits<8> opcss, bits<8> opcps,
|
|||||||
OpSize;
|
OpSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let Constraints = "$src1 = $dst" in {
|
||||||
|
multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
|
||||||
|
string OpcodeStr,
|
||||||
|
Intrinsic F32Int,
|
||||||
|
Intrinsic F64Int> {
|
||||||
|
// Intrinsic operation, reg.
|
||||||
|
def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
|
||||||
|
(outs VR128:$dst),
|
||||||
|
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||||
|
!strconcat(OpcodeStr,
|
||||||
|
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
|
[(set VR128:$dst,
|
||||||
|
(F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||||
|
OpSize;
|
||||||
|
|
||||||
|
// Intrinsic operation, mem.
|
||||||
|
def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
|
||||||
|
(outs VR128:$dst),
|
||||||
|
(ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
|
||||||
|
!strconcat(OpcodeStr,
|
||||||
|
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
|
[(set VR128:$dst,
|
||||||
|
(F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
|
||||||
|
OpSize;
|
||||||
|
|
||||||
|
// Intrinsic operation, reg.
|
||||||
|
def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
|
||||||
|
(outs VR128:$dst),
|
||||||
|
(ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
|
||||||
|
!strconcat(OpcodeStr,
|
||||||
|
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
|
[(set VR128:$dst,
|
||||||
|
(F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
|
||||||
|
OpSize;
|
||||||
|
|
||||||
|
// Intrinsic operation, mem.
|
||||||
|
def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
|
||||||
|
(outs VR128:$dst),
|
||||||
|
(ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
|
||||||
|
!strconcat(OpcodeStr,
|
||||||
|
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
|
||||||
|
[(set VR128:$dst,
|
||||||
|
(F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
|
||||||
|
OpSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// FP round - roundss, roundps, roundsd, roundpd
|
// FP round - roundss, roundps, roundsd, roundpd
|
||||||
defm ROUND : sse41_fp_unop_rm<0x0A, 0x08, 0x0B, 0x09, "round",
|
defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
|
||||||
int_x86_sse41_round_ss, int_x86_sse41_round_ps,
|
int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
|
||||||
int_x86_sse41_round_sd, int_x86_sse41_round_pd>;
|
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
|
||||||
|
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
|
||||||
|
|
||||||
// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
|
// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
|
||||||
multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
|
multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user