mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-13 04:38:24 +00:00
Add AVX suport for fpextend.
Original patch by Syoyo Fujita with more comments by me. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133153 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -2063,6 +2063,15 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
|
|||||||
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
|
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
|
||||||
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
|
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
|
||||||
|
|
||||||
|
// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
|
||||||
|
// in the non-AVX version bits 127:64 aren't touched. Find a better way to
|
||||||
|
// represent this instead of always zeroing SRC1. One possible solution is
|
||||||
|
// to represent the instruction w/ something similar as the "$src1 = $dst"
|
||||||
|
// constraint but without the tied operands.
|
||||||
|
def : Pat<(extloadf32 addr:$src),
|
||||||
|
(VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>,
|
||||||
|
Requires<[HasAVX, OptForSpeed]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// SSE 1 & 2 - Load/Store XCSR register
|
// SSE 1 & 2 - Load/Store XCSR register
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
@ -3589,6 +3598,16 @@ let Predicates = [HasSSE2] in
|
|||||||
def : Pat<(fextend (loadf32 addr:$src)),
|
def : Pat<(fextend (loadf32 addr:$src)),
|
||||||
(CVTSS2SDrm addr:$src)>;
|
(CVTSS2SDrm addr:$src)>;
|
||||||
|
|
||||||
|
// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
|
||||||
|
// in the non-AVX version bits 127:64 aren't touched. Find a better way to
|
||||||
|
// represent this instead of always zeroing SRC1. One possible solution is
|
||||||
|
// to represent the instruction w/ something similar as the "$src1 = $dst"
|
||||||
|
// constraint but without the tied operands.
|
||||||
|
let Predicates = [HasAVX] in
|
||||||
|
def : Pat<(fextend (loadf32 addr:$src)),
|
||||||
|
(VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)),
|
||||||
|
addr:$src)>;
|
||||||
|
|
||||||
// bit_convert
|
// bit_convert
|
||||||
let Predicates = [HasXMMInt] in {
|
let Predicates = [HasXMMInt] in {
|
||||||
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
|
def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
|
||||||
|
@ -10,3 +10,13 @@ entry:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @fpext() nounwind uwtable {
|
||||||
|
entry:
|
||||||
|
%f = alloca float, align 4
|
||||||
|
%d = alloca double, align 8
|
||||||
|
%tmp = load float* %f, align 4
|
||||||
|
; CHECK: vcvtss2sd
|
||||||
|
%conv = fpext float %tmp to double
|
||||||
|
store double %conv, double* %d, align 8
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user