Add AVX suport for fpextend.

Original patch by Syoyo Fujita with more comments by me. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133153 91177308-0d34-0410-b5e6-96231b3b80d8
2025-06-13 04:38:24 +00:00 · 2011-06-16 07:03:21 +00:00
parent 2559011a01
commit d381a7a91e
2 changed files with 29 additions and 0 deletions
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@ -2063,6 +2063,15 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
 def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
          (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
 // FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
 // in the non-AVX version bits 127:64 aren't touched. Find a better way to
 // represent this instead of always zeroing SRC1. One possible solution is
 // to represent the instruction w/ something similar as the "$src1 = $dst"
 // constraint but without the tied operands.
 def : Pat<(extloadf32 addr:$src),
          (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>,
      Requires<[HasAVX, OptForSpeed]>;
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Load/Store XCSR register
 //===----------------------------------------------------------------------===//
@ -3589,6 +3598,16 @@ let Predicates = [HasSSE2] in
 def : Pat<(fextend (loadf32 addr:$src)),
           (CVTSS2SDrm addr:$src)>;
 // FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
 // in the non-AVX version bits 127:64 aren't touched. Find a better way to
 // represent this instead of always zeroing SRC1. One possible solution is
 // to represent the instruction w/ something similar as the "$src1 = $dst"
 // constraint but without the tied operands.
 let Predicates = [HasAVX] in
 def : Pat<(fextend (loadf32 addr:$src)),
           (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)),
                        addr:$src)>;
 // bit_convert
 let Predicates = [HasXMMInt] in {
  def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
--- a/test/CodeGen/X86/avx-128.ll
+++ b/test/CodeGen/X86/avx-128.ll
@ -10,3 +10,13 @@ entry:
  ret void
 }
 define void @fpext() nounwind uwtable {
 entry:
  %f = alloca float, align 4
  %d = alloca double, align 8
  %tmp = load float* %f, align 4
  ; CHECK: vcvtss2sd
  %conv = fpext float %tmp to double
  store double %conv, double* %d, align 8
  ret void
 }